1 | // Copyright 2014-2017 The html5ever Project Developers. See the |
2 | // COPYRIGHT file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | use log::warn; |
11 | pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope}; |
12 | use markup5ever::{local_name, namespace_url, ns}; |
13 | use std::default::Default; |
14 | use std::io::{self, Write}; |
15 | |
16 | use crate::{LocalName, QualName}; |
17 | |
18 | pub fn serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()> |
19 | where |
20 | Wr: Write, |
21 | T: Serialize, |
22 | { |
23 | let mut ser: HtmlSerializer = HtmlSerializer::new(writer, opts.clone()); |
24 | node.serialize(&mut ser, opts.traversal_scope) |
25 | } |
26 | |
27 | #[derive (Clone)] |
28 | pub struct SerializeOpts { |
29 | /// Is scripting enabled? |
30 | pub scripting_enabled: bool, |
31 | |
32 | /// Serialize the root node? Default: ChildrenOnly |
33 | pub traversal_scope: TraversalScope, |
34 | |
35 | /// If the serializer is asked to serialize an invalid tree, the default |
36 | /// behavior is to panic in the event that an `end_elem` is created without a |
37 | /// matching `start_elem`. Setting this to true will prevent those panics by |
38 | /// creating a default parent on the element stack. No extra start elem will |
39 | /// actually be written. Default: false |
40 | pub create_missing_parent: bool, |
41 | } |
42 | |
43 | impl Default for SerializeOpts { |
44 | fn default() -> SerializeOpts { |
45 | SerializeOpts { |
46 | scripting_enabled: true, |
47 | traversal_scope: TraversalScope::ChildrenOnly(None), |
48 | create_missing_parent: false, |
49 | } |
50 | } |
51 | } |
52 | |
53 | #[derive (Default)] |
54 | struct ElemInfo { |
55 | html_name: Option<LocalName>, |
56 | ignore_children: bool |
57 | } |
58 | |
59 | pub struct HtmlSerializer<Wr: Write> { |
60 | pub writer: Wr, |
61 | opts: SerializeOpts, |
62 | stack: Vec<ElemInfo>, |
63 | } |
64 | |
65 | fn tagname(name: &QualName) -> LocalName { |
66 | match name.ns { |
67 | ns!(html) | ns!(mathml) | ns!(svg) => (), |
68 | ref ns: &{unknown} => { |
69 | // FIXME(#122) |
70 | warn!("node with weird namespace {:?}" , ns); |
71 | }, |
72 | } |
73 | |
74 | name.local.clone() |
75 | } |
76 | |
77 | impl<Wr: Write> HtmlSerializer<Wr> { |
78 | pub fn new(writer: Wr, opts: SerializeOpts) -> Self { |
79 | let html_name = match opts.traversal_scope { |
80 | TraversalScope::IncludeNode | TraversalScope::ChildrenOnly(None) => None, |
81 | TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)), |
82 | }; |
83 | HtmlSerializer { |
84 | writer, |
85 | opts, |
86 | stack: vec![ElemInfo { |
87 | html_name, |
88 | ignore_children: false, |
89 | }], |
90 | } |
91 | } |
92 | |
93 | fn parent(&mut self) -> &mut ElemInfo { |
94 | if self.stack.is_empty() { |
95 | if self.opts.create_missing_parent { |
96 | warn!("ElemInfo stack empty, creating new parent" ); |
97 | self.stack.push(Default::default()); |
98 | } else { |
99 | panic!("no parent ElemInfo" ) |
100 | } |
101 | } |
102 | self.stack.last_mut().unwrap() |
103 | } |
104 | |
105 | fn write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()> { |
106 | for c in text.chars() { |
107 | match c { |
108 | '&' => self.writer.write_all(b"&" ), |
109 | ' \u{00A0}' => self.writer.write_all(b" " ), |
110 | '"' if attr_mode => self.writer.write_all(b""" ), |
111 | '<' if !attr_mode => self.writer.write_all(b"<" ), |
112 | '>' if !attr_mode => self.writer.write_all(b">" ), |
113 | c => self.writer.write_fmt(format_args!(" {}" , c)), |
114 | }?; |
115 | } |
116 | Ok(()) |
117 | } |
118 | } |
119 | |
120 | impl<Wr: Write> Serializer for HtmlSerializer<Wr> { |
121 | fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()> |
122 | where |
123 | AttrIter: Iterator<Item = AttrRef<'a>>, |
124 | { |
125 | let html_name = match name.ns { |
126 | ns!(html) => Some(name.local.clone()), |
127 | _ => None, |
128 | }; |
129 | |
130 | if self.parent().ignore_children { |
131 | self.stack.push(ElemInfo { |
132 | html_name, |
133 | ignore_children: true, |
134 | }); |
135 | return Ok(()); |
136 | } |
137 | |
138 | self.writer.write_all(b"<" )?; |
139 | self.writer.write_all(tagname(&name).as_bytes())?; |
140 | for (name, value) in attrs { |
141 | self.writer.write_all(b" " )?; |
142 | |
143 | match name.ns { |
144 | ns!() => (), |
145 | ns!(xml) => self.writer.write_all(b"xml:" )?, |
146 | ns!(xmlns) => { |
147 | if name.local != local_name!("xmlns" ) { |
148 | self.writer.write_all(b"xmlns:" )?; |
149 | } |
150 | }, |
151 | ns!(xlink) => self.writer.write_all(b"xlink:" )?, |
152 | ref ns => { |
153 | // FIXME(#122) |
154 | warn!("attr with weird namespace {:?}" , ns); |
155 | self.writer.write_all(b"unknown_namespace:" )?; |
156 | }, |
157 | } |
158 | |
159 | self.writer.write_all(name.local.as_bytes())?; |
160 | self.writer.write_all(b"= \"" )?; |
161 | self.write_escaped(value, true)?; |
162 | self.writer.write_all(b" \"" )?; |
163 | } |
164 | self.writer.write_all(b">" )?; |
165 | |
166 | let ignore_children = name.ns == ns!(html) && |
167 | match name.local { |
168 | local_name!("area" ) | |
169 | local_name!("base" ) | |
170 | local_name!("basefont" ) | |
171 | local_name!("bgsound" ) | |
172 | local_name!("br" ) | |
173 | local_name!("col" ) | |
174 | local_name!("embed" ) | |
175 | local_name!("frame" ) | |
176 | local_name!("hr" ) | |
177 | local_name!("img" ) | |
178 | local_name!("input" ) | |
179 | local_name!("keygen" ) | |
180 | local_name!("link" ) | |
181 | local_name!("meta" ) | |
182 | local_name!("param" ) | |
183 | local_name!("source" ) | |
184 | local_name!("track" ) | |
185 | local_name!("wbr" ) => true, |
186 | _ => false, |
187 | }; |
188 | |
189 | self.stack.push(ElemInfo { |
190 | html_name, |
191 | ignore_children, |
192 | }); |
193 | |
194 | Ok(()) |
195 | } |
196 | |
197 | fn end_elem(&mut self, name: QualName) -> io::Result<()> { |
198 | let info = match self.stack.pop() { |
199 | Some(info) => info, |
200 | None if self.opts.create_missing_parent => { |
201 | warn!("missing ElemInfo, creating default." ); |
202 | Default::default() |
203 | }, |
204 | _ => panic!("no ElemInfo" ), |
205 | }; |
206 | if info.ignore_children { |
207 | return Ok(()); |
208 | } |
209 | |
210 | self.writer.write_all(b"</" )?; |
211 | self.writer.write_all(tagname(&name).as_bytes())?; |
212 | self.writer.write_all(b">" ) |
213 | } |
214 | |
215 | fn write_text(&mut self, text: &str) -> io::Result<()> { |
216 | let escape = match self.parent().html_name { |
217 | Some(local_name!("style" )) | |
218 | Some(local_name!("script" )) | |
219 | Some(local_name!("xmp" )) | |
220 | Some(local_name!("iframe" )) | |
221 | Some(local_name!("noembed" )) | |
222 | Some(local_name!("noframes" )) | |
223 | Some(local_name!("plaintext" )) => false, |
224 | |
225 | Some(local_name!("noscript" )) => !self.opts.scripting_enabled, |
226 | |
227 | _ => true, |
228 | }; |
229 | |
230 | if escape { |
231 | self.write_escaped(text, false) |
232 | } else { |
233 | self.writer.write_all(text.as_bytes()) |
234 | } |
235 | } |
236 | |
237 | fn write_comment(&mut self, text: &str) -> io::Result<()> { |
238 | self.writer.write_all(b"<!--" )?; |
239 | self.writer.write_all(text.as_bytes())?; |
240 | self.writer.write_all(b"-->" ) |
241 | } |
242 | |
243 | fn write_doctype(&mut self, name: &str) -> io::Result<()> { |
244 | self.writer.write_all(b"<!DOCTYPE " )?; |
245 | self.writer.write_all(name.as_bytes())?; |
246 | self.writer.write_all(b">" ) |
247 | } |
248 | |
249 | fn write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()> { |
250 | self.writer.write_all(b"<?" )?; |
251 | self.writer.write_all(target.as_bytes())?; |
252 | self.writer.write_all(b" " )?; |
253 | self.writer.write_all(data.as_bytes())?; |
254 | self.writer.write_all(b">" ) |
255 | } |
256 | } |
257 | |