| 1 | // Copyright 2014-2017 The html5ever Project Developers. See the |
| 2 | // COPYRIGHT file at the top-level directory of this distribution. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 7 | // option. This file may not be copied, modified, or distributed |
| 8 | // except according to those terms. |
| 9 | |
| 10 | use log::warn; |
| 11 | pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope}; |
| 12 | use markup5ever::{local_name, namespace_url, ns}; |
| 13 | use std::io::{self, Write}; |
| 14 | |
| 15 | use crate::{LocalName, QualName}; |
| 16 | |
| 17 | pub fn serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()> |
| 18 | where |
| 19 | Wr: Write, |
| 20 | T: Serialize, |
| 21 | { |
| 22 | let mut ser: HtmlSerializer = HtmlSerializer::new(writer, opts.clone()); |
| 23 | node.serialize(&mut ser, opts.traversal_scope) |
| 24 | } |
| 25 | |
| 26 | #[derive (Clone)] |
| 27 | pub struct SerializeOpts { |
| 28 | /// Is scripting enabled? Default: true |
| 29 | pub scripting_enabled: bool, |
| 30 | |
| 31 | /// Serialize the root node? Default: ChildrenOnly |
| 32 | pub traversal_scope: TraversalScope, |
| 33 | |
| 34 | /// If the serializer is asked to serialize an invalid tree, the default |
| 35 | /// behavior is to panic in the event that an `end_elem` is created without a |
| 36 | /// matching `start_elem`. Setting this to true will prevent those panics by |
| 37 | /// creating a default parent on the element stack. No extra start elem will |
| 38 | /// actually be written. Default: false |
| 39 | pub create_missing_parent: bool, |
| 40 | } |
| 41 | |
| 42 | impl Default for SerializeOpts { |
| 43 | fn default() -> SerializeOpts { |
| 44 | SerializeOpts { |
| 45 | scripting_enabled: true, |
| 46 | traversal_scope: TraversalScope::ChildrenOnly(None), |
| 47 | create_missing_parent: false, |
| 48 | } |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | #[derive (Default)] |
| 53 | struct ElemInfo { |
| 54 | html_name: Option<LocalName>, |
| 55 | ignore_children: bool, |
| 56 | } |
| 57 | |
| 58 | pub struct HtmlSerializer<Wr: Write> { |
| 59 | pub writer: Wr, |
| 60 | opts: SerializeOpts, |
| 61 | stack: Vec<ElemInfo>, |
| 62 | } |
| 63 | |
| 64 | fn tagname(name: &QualName) -> LocalName { |
| 65 | match name.ns { |
| 66 | ns!(html) | ns!(mathml) | ns!(svg) => (), |
| 67 | ref ns: &Atom => { |
| 68 | // FIXME(#122) |
| 69 | warn!("node with weird namespace {:?}" , ns); |
| 70 | }, |
| 71 | } |
| 72 | |
| 73 | name.local.clone() |
| 74 | } |
| 75 | |
| 76 | impl<Wr: Write> HtmlSerializer<Wr> { |
| 77 | pub fn new(writer: Wr, opts: SerializeOpts) -> Self { |
| 78 | let html_name = match opts.traversal_scope { |
| 79 | TraversalScope::IncludeNode | TraversalScope::ChildrenOnly(None) => None, |
| 80 | TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)), |
| 81 | }; |
| 82 | HtmlSerializer { |
| 83 | writer, |
| 84 | opts, |
| 85 | stack: vec![ElemInfo { |
| 86 | html_name, |
| 87 | ignore_children: false, |
| 88 | }], |
| 89 | } |
| 90 | } |
| 91 | |
| 92 | fn parent(&mut self) -> &mut ElemInfo { |
| 93 | if self.stack.is_empty() { |
| 94 | if self.opts.create_missing_parent { |
| 95 | warn!("ElemInfo stack empty, creating new parent" ); |
| 96 | self.stack.push(Default::default()); |
| 97 | } else { |
| 98 | panic!("no parent ElemInfo" ) |
| 99 | } |
| 100 | } |
| 101 | self.stack.last_mut().unwrap() |
| 102 | } |
| 103 | |
| 104 | fn write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()> { |
| 105 | for c in text.chars() { |
| 106 | match c { |
| 107 | '&' => self.writer.write_all(b"&" ), |
| 108 | ' \u{00A0}' => self.writer.write_all(b" " ), |
| 109 | '"' if attr_mode => self.writer.write_all(b""" ), |
| 110 | '<' if !attr_mode => self.writer.write_all(b"<" ), |
| 111 | '>' if !attr_mode => self.writer.write_all(b">" ), |
| 112 | c => self.writer.write_fmt(format_args!(" {c}" )), |
| 113 | }?; |
| 114 | } |
| 115 | Ok(()) |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | impl<Wr: Write> Serializer for HtmlSerializer<Wr> { |
| 120 | fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()> |
| 121 | where |
| 122 | AttrIter: Iterator<Item = AttrRef<'a>>, |
| 123 | { |
| 124 | let html_name = match name.ns { |
| 125 | ns!(html) => Some(name.local.clone()), |
| 126 | _ => None, |
| 127 | }; |
| 128 | |
| 129 | if self.parent().ignore_children { |
| 130 | self.stack.push(ElemInfo { |
| 131 | html_name, |
| 132 | ignore_children: true, |
| 133 | }); |
| 134 | return Ok(()); |
| 135 | } |
| 136 | |
| 137 | self.writer.write_all(b"<" )?; |
| 138 | self.writer.write_all(tagname(&name).as_bytes())?; |
| 139 | for (name, value) in attrs { |
| 140 | self.writer.write_all(b" " )?; |
| 141 | |
| 142 | match name.ns { |
| 143 | ns!() => (), |
| 144 | ns!(xml) => self.writer.write_all(b"xml:" )?, |
| 145 | ns!(xmlns) => { |
| 146 | if name.local != local_name!("xmlns" ) { |
| 147 | self.writer.write_all(b"xmlns:" )?; |
| 148 | } |
| 149 | }, |
| 150 | ns!(xlink) => self.writer.write_all(b"xlink:" )?, |
| 151 | ref ns => { |
| 152 | // FIXME(#122) |
| 153 | warn!("attr with weird namespace {:?}" , ns); |
| 154 | self.writer.write_all(b"unknown_namespace:" )?; |
| 155 | }, |
| 156 | } |
| 157 | |
| 158 | self.writer.write_all(name.local.as_bytes())?; |
| 159 | self.writer.write_all(b"= \"" )?; |
| 160 | self.write_escaped(value, true)?; |
| 161 | self.writer.write_all(b" \"" )?; |
| 162 | } |
| 163 | self.writer.write_all(b">" )?; |
| 164 | |
| 165 | let ignore_children = name.ns == ns!(html) |
| 166 | && matches!( |
| 167 | name.local, |
| 168 | local_name!("area" ) |
| 169 | | local_name!("base" ) |
| 170 | | local_name!("basefont" ) |
| 171 | | local_name!("bgsound" ) |
| 172 | | local_name!("br" ) |
| 173 | | local_name!("col" ) |
| 174 | | local_name!("embed" ) |
| 175 | | local_name!("frame" ) |
| 176 | | local_name!("hr" ) |
| 177 | | local_name!("img" ) |
| 178 | | local_name!("input" ) |
| 179 | | local_name!("keygen" ) |
| 180 | | local_name!("link" ) |
| 181 | | local_name!("meta" ) |
| 182 | | local_name!("param" ) |
| 183 | | local_name!("source" ) |
| 184 | | local_name!("track" ) |
| 185 | | local_name!("wbr" ) |
| 186 | ); |
| 187 | |
| 188 | self.stack.push(ElemInfo { |
| 189 | html_name, |
| 190 | ignore_children, |
| 191 | }); |
| 192 | |
| 193 | Ok(()) |
| 194 | } |
| 195 | |
| 196 | fn end_elem(&mut self, name: QualName) -> io::Result<()> { |
| 197 | let info = match self.stack.pop() { |
| 198 | Some(info) => info, |
| 199 | None if self.opts.create_missing_parent => { |
| 200 | warn!("missing ElemInfo, creating default." ); |
| 201 | Default::default() |
| 202 | }, |
| 203 | _ => panic!("no ElemInfo" ), |
| 204 | }; |
| 205 | if info.ignore_children { |
| 206 | return Ok(()); |
| 207 | } |
| 208 | |
| 209 | self.writer.write_all(b"</" )?; |
| 210 | self.writer.write_all(tagname(&name).as_bytes())?; |
| 211 | self.writer.write_all(b">" ) |
| 212 | } |
| 213 | |
| 214 | fn write_text(&mut self, text: &str) -> io::Result<()> { |
| 215 | let escape = match self.parent().html_name { |
| 216 | Some(local_name!("style" )) |
| 217 | | Some(local_name!("script" )) |
| 218 | | Some(local_name!("xmp" )) |
| 219 | | Some(local_name!("iframe" )) |
| 220 | | Some(local_name!("noembed" )) |
| 221 | | Some(local_name!("noframes" )) |
| 222 | | Some(local_name!("plaintext" )) => false, |
| 223 | |
| 224 | Some(local_name!("noscript" )) => !self.opts.scripting_enabled, |
| 225 | |
| 226 | _ => true, |
| 227 | }; |
| 228 | |
| 229 | if escape { |
| 230 | self.write_escaped(text, false) |
| 231 | } else { |
| 232 | self.writer.write_all(text.as_bytes()) |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | fn write_comment(&mut self, text: &str) -> io::Result<()> { |
| 237 | self.writer.write_all(b"<!--" )?; |
| 238 | self.writer.write_all(text.as_bytes())?; |
| 239 | self.writer.write_all(b"-->" ) |
| 240 | } |
| 241 | |
| 242 | fn write_doctype(&mut self, name: &str) -> io::Result<()> { |
| 243 | self.writer.write_all(b"<!DOCTYPE " )?; |
| 244 | self.writer.write_all(name.as_bytes())?; |
| 245 | self.writer.write_all(b">" ) |
| 246 | } |
| 247 | |
| 248 | fn write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()> { |
| 249 | self.writer.write_all(b"<?" )?; |
| 250 | self.writer.write_all(target.as_bytes())?; |
| 251 | self.writer.write_all(b" " )?; |
| 252 | self.writer.write_all(data.as_bytes())?; |
| 253 | self.writer.write_all(b">" ) |
| 254 | } |
| 255 | } |
| 256 | |