1 | // Copyright 2014-2017 The html5ever Project Developers. See the |
2 | // COPYRIGHT file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | use log::warn; |
11 | pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope}; |
12 | use markup5ever::{local_name, namespace_url, ns}; |
13 | use std::io::{self, Write}; |
14 | |
15 | use crate::{LocalName, QualName}; |
16 | |
17 | pub fn serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()> |
18 | where |
19 | Wr: Write, |
20 | T: Serialize, |
21 | { |
22 | let mut ser: HtmlSerializer = HtmlSerializer::new(writer, opts.clone()); |
23 | node.serialize(&mut ser, opts.traversal_scope) |
24 | } |
25 | |
26 | #[derive (Clone)] |
27 | pub struct SerializeOpts { |
28 | /// Is scripting enabled? Default: true |
29 | pub scripting_enabled: bool, |
30 | |
31 | /// Serialize the root node? Default: ChildrenOnly |
32 | pub traversal_scope: TraversalScope, |
33 | |
34 | /// If the serializer is asked to serialize an invalid tree, the default |
35 | /// behavior is to panic in the event that an `end_elem` is created without a |
36 | /// matching `start_elem`. Setting this to true will prevent those panics by |
37 | /// creating a default parent on the element stack. No extra start elem will |
38 | /// actually be written. Default: false |
39 | pub create_missing_parent: bool, |
40 | } |
41 | |
42 | impl Default for SerializeOpts { |
43 | fn default() -> SerializeOpts { |
44 | SerializeOpts { |
45 | scripting_enabled: true, |
46 | traversal_scope: TraversalScope::ChildrenOnly(None), |
47 | create_missing_parent: false, |
48 | } |
49 | } |
50 | } |
51 | |
52 | #[derive (Default)] |
53 | struct ElemInfo { |
54 | html_name: Option<LocalName>, |
55 | ignore_children: bool, |
56 | } |
57 | |
58 | pub struct HtmlSerializer<Wr: Write> { |
59 | pub writer: Wr, |
60 | opts: SerializeOpts, |
61 | stack: Vec<ElemInfo>, |
62 | } |
63 | |
64 | fn tagname(name: &QualName) -> LocalName { |
65 | match name.ns { |
66 | ns!(html) | ns!(mathml) | ns!(svg) => (), |
67 | ref ns: &Atom => { |
68 | // FIXME(#122) |
69 | warn!("node with weird namespace {:?}" , ns); |
70 | }, |
71 | } |
72 | |
73 | name.local.clone() |
74 | } |
75 | |
76 | impl<Wr: Write> HtmlSerializer<Wr> { |
77 | pub fn new(writer: Wr, opts: SerializeOpts) -> Self { |
78 | let html_name = match opts.traversal_scope { |
79 | TraversalScope::IncludeNode | TraversalScope::ChildrenOnly(None) => None, |
80 | TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)), |
81 | }; |
82 | HtmlSerializer { |
83 | writer, |
84 | opts, |
85 | stack: vec![ElemInfo { |
86 | html_name, |
87 | ignore_children: false, |
88 | }], |
89 | } |
90 | } |
91 | |
92 | fn parent(&mut self) -> &mut ElemInfo { |
93 | if self.stack.is_empty() { |
94 | if self.opts.create_missing_parent { |
95 | warn!("ElemInfo stack empty, creating new parent" ); |
96 | self.stack.push(Default::default()); |
97 | } else { |
98 | panic!("no parent ElemInfo" ) |
99 | } |
100 | } |
101 | self.stack.last_mut().unwrap() |
102 | } |
103 | |
104 | fn write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()> { |
105 | for c in text.chars() { |
106 | match c { |
107 | '&' => self.writer.write_all(b"&" ), |
108 | ' \u{00A0}' => self.writer.write_all(b" " ), |
109 | '"' if attr_mode => self.writer.write_all(b""" ), |
110 | '<' if !attr_mode => self.writer.write_all(b"<" ), |
111 | '>' if !attr_mode => self.writer.write_all(b">" ), |
112 | c => self.writer.write_fmt(format_args!(" {c}" )), |
113 | }?; |
114 | } |
115 | Ok(()) |
116 | } |
117 | } |
118 | |
119 | impl<Wr: Write> Serializer for HtmlSerializer<Wr> { |
120 | fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()> |
121 | where |
122 | AttrIter: Iterator<Item = AttrRef<'a>>, |
123 | { |
124 | let html_name = match name.ns { |
125 | ns!(html) => Some(name.local.clone()), |
126 | _ => None, |
127 | }; |
128 | |
129 | if self.parent().ignore_children { |
130 | self.stack.push(ElemInfo { |
131 | html_name, |
132 | ignore_children: true, |
133 | }); |
134 | return Ok(()); |
135 | } |
136 | |
137 | self.writer.write_all(b"<" )?; |
138 | self.writer.write_all(tagname(&name).as_bytes())?; |
139 | for (name, value) in attrs { |
140 | self.writer.write_all(b" " )?; |
141 | |
142 | match name.ns { |
143 | ns!() => (), |
144 | ns!(xml) => self.writer.write_all(b"xml:" )?, |
145 | ns!(xmlns) => { |
146 | if name.local != local_name!("xmlns" ) { |
147 | self.writer.write_all(b"xmlns:" )?; |
148 | } |
149 | }, |
150 | ns!(xlink) => self.writer.write_all(b"xlink:" )?, |
151 | ref ns => { |
152 | // FIXME(#122) |
153 | warn!("attr with weird namespace {:?}" , ns); |
154 | self.writer.write_all(b"unknown_namespace:" )?; |
155 | }, |
156 | } |
157 | |
158 | self.writer.write_all(name.local.as_bytes())?; |
159 | self.writer.write_all(b"= \"" )?; |
160 | self.write_escaped(value, true)?; |
161 | self.writer.write_all(b" \"" )?; |
162 | } |
163 | self.writer.write_all(b">" )?; |
164 | |
165 | let ignore_children = name.ns == ns!(html) |
166 | && matches!( |
167 | name.local, |
168 | local_name!("area" ) |
169 | | local_name!("base" ) |
170 | | local_name!("basefont" ) |
171 | | local_name!("bgsound" ) |
172 | | local_name!("br" ) |
173 | | local_name!("col" ) |
174 | | local_name!("embed" ) |
175 | | local_name!("frame" ) |
176 | | local_name!("hr" ) |
177 | | local_name!("img" ) |
178 | | local_name!("input" ) |
179 | | local_name!("keygen" ) |
180 | | local_name!("link" ) |
181 | | local_name!("meta" ) |
182 | | local_name!("param" ) |
183 | | local_name!("source" ) |
184 | | local_name!("track" ) |
185 | | local_name!("wbr" ) |
186 | ); |
187 | |
188 | self.stack.push(ElemInfo { |
189 | html_name, |
190 | ignore_children, |
191 | }); |
192 | |
193 | Ok(()) |
194 | } |
195 | |
196 | fn end_elem(&mut self, name: QualName) -> io::Result<()> { |
197 | let info = match self.stack.pop() { |
198 | Some(info) => info, |
199 | None if self.opts.create_missing_parent => { |
200 | warn!("missing ElemInfo, creating default." ); |
201 | Default::default() |
202 | }, |
203 | _ => panic!("no ElemInfo" ), |
204 | }; |
205 | if info.ignore_children { |
206 | return Ok(()); |
207 | } |
208 | |
209 | self.writer.write_all(b"</" )?; |
210 | self.writer.write_all(tagname(&name).as_bytes())?; |
211 | self.writer.write_all(b">" ) |
212 | } |
213 | |
214 | fn write_text(&mut self, text: &str) -> io::Result<()> { |
215 | let escape = match self.parent().html_name { |
216 | Some(local_name!("style" )) |
217 | | Some(local_name!("script" )) |
218 | | Some(local_name!("xmp" )) |
219 | | Some(local_name!("iframe" )) |
220 | | Some(local_name!("noembed" )) |
221 | | Some(local_name!("noframes" )) |
222 | | Some(local_name!("plaintext" )) => false, |
223 | |
224 | Some(local_name!("noscript" )) => !self.opts.scripting_enabled, |
225 | |
226 | _ => true, |
227 | }; |
228 | |
229 | if escape { |
230 | self.write_escaped(text, false) |
231 | } else { |
232 | self.writer.write_all(text.as_bytes()) |
233 | } |
234 | } |
235 | |
236 | fn write_comment(&mut self, text: &str) -> io::Result<()> { |
237 | self.writer.write_all(b"<!--" )?; |
238 | self.writer.write_all(text.as_bytes())?; |
239 | self.writer.write_all(b"-->" ) |
240 | } |
241 | |
242 | fn write_doctype(&mut self, name: &str) -> io::Result<()> { |
243 | self.writer.write_all(b"<!DOCTYPE " )?; |
244 | self.writer.write_all(name.as_bytes())?; |
245 | self.writer.write_all(b">" ) |
246 | } |
247 | |
248 | fn write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()> { |
249 | self.writer.write_all(b"<?" )?; |
250 | self.writer.write_all(target.as_bytes())?; |
251 | self.writer.write_all(b" " )?; |
252 | self.writer.write_all(data.as_bytes())?; |
253 | self.writer.write_all(b">" ) |
254 | } |
255 | } |
256 | |