1// Copyright (C) Michael Howell and others
2// this library is released under the same terms as Rust itself.
3
4#![deny(unsafe_code)]
5#![deny(missing_docs)]
6
7//! Ammonia is a whitelist-based HTML sanitization library. It is designed to
8//! prevent cross-site scripting, layout breaking, and clickjacking caused
9//! by untrusted user-provided HTML being mixed into a larger web page.
10//!
11//! Ammonia uses [html5ever] to parse and serialize document fragments the same way browsers do,
12//! so it is extremely resilient to syntactic obfuscation.
13//!
14//! Ammonia parses its input exactly according to the HTML5 specification;
15//! it will not linkify bare URLs, insert line or paragraph breaks, or convert `(C)` into ©.
16//! If you want that, use a markup processor before running the sanitizer, like [pulldown-cmark].
17//!
18//! # Examples
19//!
20//! ```
21//! let result = ammonia::clean(
22//! "<b><img src='' onerror=alert('hax')>I'm not trying to XSS you</b>"
23//! );
24//! assert_eq!(result, "<b><img src=\"\">I'm not trying to XSS you</b>");
25//! ```
26//!
27//! [html5ever]: https://github.com/servo/html5ever "The HTML parser in Servo"
28//! [pulldown-cmark]: https://github.com/google/pulldown-cmark "CommonMark parser"
29
30
31#[cfg(ammonia_unstable)]
32pub mod rcdom;
33
34#[cfg(not(ammonia_unstable))]
35mod rcdom;
36
37use html5ever::interface::Attribute;
38use html5ever::serialize::{serialize, SerializeOpts};
39use html5ever::tree_builder::{NodeOrText, TreeSink};
40use html5ever::{driver as html, local_name, namespace_url, ns, QualName};
41use maplit::{hashmap, hashset};
42use once_cell::sync::Lazy;
43use rcdom::{Handle, NodeData, RcDom, SerializableHandle};
44use std::borrow::{Borrow, Cow};
45use std::cmp::max;
46use std::collections::{HashMap, HashSet};
47use std::fmt;
48use std::io;
49use std::iter::IntoIterator as IntoIter;
50use std::mem;
51use std::rc::Rc;
52use std::str::FromStr;
53use tendril::stream::TendrilSink;
54use tendril::StrTendril;
55use tendril::{format_tendril, ByteTendril};
56pub use url::Url;
57
58use html5ever::buffer_queue::BufferQueue;
59use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer};
60pub use url;
61
62static AMMONIA: Lazy<Builder<'static>> = Lazy::new(Builder::default);
63
64/// Clean HTML with a conservative set of defaults.
65///
66/// * [tags](struct.Builder.html#defaults)
67/// * [`script` and `style` have their contents stripped](struct.Builder.html#defaults-1)
68/// * [attributes on specific tags](struct.Builder.html#defaults-2)
69/// * [attributes on all tags](struct.Builder.html#defaults-6)
70/// * [url schemes](struct.Builder.html#defaults-7)
71/// * [relative URLs are passed through, unchanged, by default](struct.Builder.html#defaults-8)
72/// * [links are marked `noopener noreferrer` by default](struct.Builder.html#defaults-9)
73/// * all `class=""` settings are blocked by default
74/// * comments are stripped by default
75/// * no generic attribute prefixes are turned on by default
76/// * no specific tag-attribute-value settings are configured by default
77///
78/// [opener]: https://mathiasbynens.github.io/rel-noopener/
79/// [referrer]: https://en.wikipedia.org/wiki/HTTP_referer
80///
81/// # Examples
82///
83/// assert_eq!(ammonia::clean("XSS<script>attack</script>"), "XSS")
84pub fn clean(src: &str) -> String {
85 AMMONIA.clean(src).to_string()
86}
87
88/// Turn an arbitrary string into unformatted HTML.
89///
90/// This function is roughly equivalent to PHP's `htmlspecialchars` and `htmlentities`.
91/// It is as strict as possible, encoding every character that has special meaning to the
92/// HTML parser.
93///
94/// # Warnings
95///
96/// This function cannot be used to package strings into a `<script>` or `<style>` tag;
97/// you need a JavaScript or CSS escaper to do that.
98///
99/// // DO NOT DO THIS
100/// # use ammonia::clean_text;
101/// let untrusted = "Robert\"); abuse();//";
102/// let html = format!("<script>invoke(\"{}\")</script>", clean_text(untrusted));
103///
104/// `<textarea>` tags will strip the first newline, if present, even if that newline is encoded.
105/// If you want to build an editor that works the way most folks expect them to, you should put a
106/// newline at the beginning of the tag, like this:
107///
108/// # use ammonia::{Builder, clean_text};
109/// let untrusted = "\n\nhi!";
110/// let mut b = Builder::new();
111/// b.add_tags(&["textarea"]);
112/// // This is the bad version
113/// // The user put two newlines at the beginning, but the first one was removed
114/// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted))).to_string();
115/// assert_eq!("<textarea>\nhi!</textarea>", sanitized);
116/// // This is a good version
117/// // The user put two newlines at the beginning, and we add a third one,
118/// // so the result still has two
119/// let sanitized = b.clean(&format!("<textarea>\n{}</textarea>", clean_text(untrusted))).to_string();
120/// assert_eq!("<textarea>\n\nhi!</textarea>", sanitized);
121/// // This version is also often considered good
122/// // For many applications, leading and trailing whitespace is probably unwanted
123/// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted.trim()))).to_string();
124/// assert_eq!("<textarea>hi!</textarea>", sanitized);
125///
126/// It also does not make user text safe for HTML attribute microsyntaxes such as `class` or `id`.
127/// Only use this function for places where HTML accepts unrestricted text such as `title` attributes
128/// and paragraph contents.
129pub fn clean_text(src: &str) -> String {
130 let mut ret_val = String::with_capacity(max(4, src.len()));
131 for c in src.chars() {
132 let replacement = match c {
133 // this character, when confronted, will start a tag
134 '<' => "&lt;",
135 // in an unquoted attribute, will end the attribute value
136 '>' => "&gt;",
137 // in an attribute surrounded by double quotes, this character will end the attribute value
138 '\"' => "&quot;",
139 // in an attribute surrounded by single quotes, this character will end the attribute value
140 '\'' => "&apos;",
141 // in HTML5, returns a bogus parse error in an unquoted attribute, while in SGML/HTML, it will end an attribute value surrounded by backquotes
142 '`' => "&grave;",
143 // in an unquoted attribute, this character will end the attribute
144 '/' => "&#47;",
145 // starts an entity reference
146 '&' => "&amp;",
147 // if at the beginning of an unquoted attribute, will get ignored
148 '=' => "&#61;",
149 // will end an unquoted attribute
150 ' ' => "&#32;",
151 '\t' => "&#9;",
152 '\n' => "&#10;",
153 '\x0c' => "&#12;",
154 '\r' => "&#13;",
155 // a spec-compliant browser will perform this replacement anyway, but the middleware might not
156 '\0' => "&#65533;",
157 // ALL OTHER CHARACTERS ARE PASSED THROUGH VERBATIM
158 _ => {
159 ret_val.push(c);
160 continue;
161 }
162 };
163 ret_val.push_str(replacement);
164 }
165 ret_val
166}
167
168/// Determine if a given string contains HTML
169///
170/// This function is parses the full string into HTML and checks if the input contained any
171/// HTML syntax.
172///
173/// # Note
174/// This function will return positively for strings that contain invalid HTML syntax like
175/// `<g>` and even `Vec::<u8>::new()`.
176pub fn is_html(input: &str) -> bool {
177 let santok: SanitizationTokenizer = SanitizationTokenizer::new();
178 let mut chunk: Tendril = ByteTendril::new();
179 chunk.push_slice(input.as_bytes());
180 let mut input: BufferQueue = BufferQueue::new();
181 input.push_back(buf:chunk.try_reinterpret().unwrap());
182
183 let mut tok: Tokenizer = Tokenizer::new(sink:santok, opts:Default::default());
184 let _ = tok.feed(&mut input);
185 tok.end();
186 tok.sink.was_sanitized
187}
188
189#[derive(Copy, Clone)]
190struct SanitizationTokenizer {
191 was_sanitized: bool,
192}
193
194impl SanitizationTokenizer {
195 pub fn new() -> SanitizationTokenizer {
196 SanitizationTokenizer {
197 was_sanitized: false,
198 }
199 }
200}
201
202impl TokenSink for SanitizationTokenizer {
203 type Handle = ();
204 fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
205 match token {
206 Token::CharacterTokens(_) | Token::EOFToken | Token::ParseError(_) => {}
207 _ => {
208 self.was_sanitized = true;
209 }
210 }
211 TokenSinkResult::Continue
212 }
213 fn end(&mut self) {}
214}
215
216/// An HTML sanitizer.
217///
218/// Given a fragment of HTML, Ammonia will parse it according to the HTML5
219/// parsing algorithm and sanitize any disallowed tags or attributes. This
220/// algorithm also takes care of things like unclosed and (some) misnested
221/// tags.
222///
223/// # Examples
224///
225/// use ammonia::{Builder, UrlRelative};
226///
227/// let a = Builder::default()
228/// .link_rel(None)
229/// .url_relative(UrlRelative::PassThrough)
230/// .clean("<a href=/>test")
231/// .to_string();
232/// assert_eq!(
233/// a,
234/// "<a href=\"/\">test</a>");
235///
236/// # Panics
237///
238/// Running [`clean`] or [`clean_from_reader`] may cause a panic if the builder is
239/// configured with any of these (contradictory) settings:
240///
241/// * The `rel` attribute is added to [`generic_attributes`] or the
242/// [`tag_attributes`] for the `<a>` tag, and [`link_rel`] is not set to `None`.
243///
244/// For example, this is going to panic, since [`link_rel`] is set to
245/// `Some("noopener noreferrer")` by default,
246/// and it makes no sense to simultaneously say that the user is allowed to
247/// set their own `rel` attribute while saying that every link shall be set to
248/// a particular value:
249///
250/// ```should_panic
251/// use ammonia::Builder;
252/// use maplit::hashset;
253///
254/// # fn main() {
255/// Builder::default()
256/// .generic_attributes(hashset!["rel"])
257/// .clean("");
258/// # }
259/// ```
260///
261/// This, however, is perfectly valid:
262///
263/// ```
264/// use ammonia::Builder;
265/// use maplit::hashset;
266///
267/// # fn main() {
268/// Builder::default()
269/// .generic_attributes(hashset!["rel"])
270/// .link_rel(None)
271/// .clean("");
272/// # }
273/// ```
274///
275/// * The `class` attribute is in [`allowed_classes`] and is in the
276/// corresponding [`tag_attributes`] or in [`generic_attributes`].
277///
278/// This is done both to line up with the treatment of `rel`,
279/// and to prevent people from accidentally allowing arbitrary
280/// classes on a particular element.
281///
282/// This will panic:
283///
284/// ```should_panic
285/// use ammonia::Builder;
286/// use maplit::{hashmap, hashset};
287///
288/// # fn main() {
289/// Builder::default()
290/// .generic_attributes(hashset!["class"])
291/// .allowed_classes(hashmap!["span" => hashset!["hidden"]])
292/// .clean("");
293/// # }
294/// ```
295///
296/// This, however, is perfectly valid:
297///
298/// ```
299/// use ammonia::Builder;
300/// use maplit::{hashmap, hashset};
301///
302/// # fn main() {
303/// Builder::default()
304/// .allowed_classes(hashmap!["span" => hashset!["hidden"]])
305/// .clean("");
306/// # }
307/// ```
308///
309/// * A tag is in either [`tags`] or [`tag_attributes`] while also
310/// being in [`clean_content_tags`].
311///
312/// Both [`tags`] and [`tag_attributes`] are whitelists but
313/// [`clean_content_tags`] is a blacklist, so it doesn't make sense
314/// to have the same tag in both.
315///
316/// For example, this will panic, since the `aside` tag is in
317/// [`tags`] by default:
318///
319/// ```should_panic
320/// use ammonia::Builder;
321/// use maplit::hashset;
322///
323/// # fn main() {
324/// Builder::default()
325/// .clean_content_tags(hashset!["aside"])
326/// .clean("");
327/// # }
328/// ```
329///
330/// This, however, is valid:
331///
332/// ```
333/// use ammonia::Builder;
334/// use maplit::hashset;
335///
336/// # fn main() {
337/// Builder::default()
338/// .rm_tags(&["aside"])
339/// .clean_content_tags(hashset!["aside"])
340/// .clean("");
341/// # }
342/// ```
343///
344/// [`clean`]: #method.clean
345/// [`clean_from_reader`]: #method.clean_from_reader
346/// [`generic_attributes`]: #method.generic_attributes
347/// [`tag_attributes`]: #method.tag_attributes
348/// [`generic_attributes`]: #method.generic_attributes
349/// [`link_rel`]: #method.link_rel
350/// [`allowed_classes`]: #method.allowed_classes
351/// [`id_prefix`]: #method.id_prefix
352/// [`tags`]: #method.tags
353/// [`clean_content_tags`]: #method.clean_content_tags
354#[derive(Debug)]
355pub struct Builder<'a> {
356 tags: HashSet<&'a str>,
357 clean_content_tags: HashSet<&'a str>,
358 tag_attributes: HashMap<&'a str, HashSet<&'a str>>,
359 tag_attribute_values: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
360 set_tag_attribute_values: HashMap<&'a str, HashMap<&'a str, &'a str>>,
361 generic_attributes: HashSet<&'a str>,
362 url_schemes: HashSet<&'a str>,
363 url_relative: UrlRelative,
364 attribute_filter: Option<Box<dyn AttributeFilter>>,
365 link_rel: Option<&'a str>,
366 allowed_classes: HashMap<&'a str, HashSet<&'a str>>,
367 strip_comments: bool,
368 id_prefix: Option<&'a str>,
369 generic_attribute_prefixes: Option<HashSet<&'a str>>,
370}
371
372impl<'a> Default for Builder<'a> {
373 fn default() -> Self {
374 #[rustfmt::skip]
375 let tags = hashset![
376 "a", "abbr", "acronym", "area", "article", "aside", "b", "bdi",
377 "bdo", "blockquote", "br", "caption", "center", "cite", "code",
378 "col", "colgroup", "data", "dd", "del", "details", "dfn", "div",
379 "dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2",
380 "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img",
381 "ins", "kbd", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre",
382 "q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span",
383 "strike", "strong", "sub", "summary", "sup", "table", "tbody",
384 "td", "th", "thead", "time", "tr", "tt", "u", "ul", "var", "wbr"
385 ];
386 let clean_content_tags = hashset!["script", "style"];
387 let generic_attributes = hashset!["lang", "title"];
388 let tag_attributes = hashmap![
389 "a" => hashset![
390 "href", "hreflang"
391 ],
392 "bdo" => hashset![
393 "dir"
394 ],
395 "blockquote" => hashset![
396 "cite"
397 ],
398 "col" => hashset![
399 "align", "char", "charoff", "span"
400 ],
401 "colgroup" => hashset![
402 "align", "char", "charoff", "span"
403 ],
404 "del" => hashset![
405 "cite", "datetime"
406 ],
407 "hr" => hashset![
408 "align", "size", "width"
409 ],
410 "img" => hashset![
411 "align", "alt", "height", "src", "width"
412 ],
413 "ins" => hashset![
414 "cite", "datetime"
415 ],
416 "ol" => hashset![
417 "start"
418 ],
419 "q" => hashset![
420 "cite"
421 ],
422 "table" => hashset![
423 "align", "char", "charoff", "summary"
424 ],
425 "tbody" => hashset![
426 "align", "char", "charoff"
427 ],
428 "td" => hashset![
429 "align", "char", "charoff", "colspan", "headers", "rowspan"
430 ],
431 "tfoot" => hashset![
432 "align", "char", "charoff"
433 ],
434 "th" => hashset![
435 "align", "char", "charoff", "colspan", "headers", "rowspan", "scope"
436 ],
437 "thead" => hashset![
438 "align", "char", "charoff"
439 ],
440 "tr" => hashset![
441 "align", "char", "charoff"
442 ],
443 ];
444 let tag_attribute_values = hashmap![];
445 let set_tag_attribute_values = hashmap![];
446 let url_schemes = hashset![
447 "bitcoin",
448 "ftp",
449 "ftps",
450 "geo",
451 "http",
452 "https",
453 "im",
454 "irc",
455 "ircs",
456 "magnet",
457 "mailto",
458 "mms",
459 "mx",
460 "news",
461 "nntp",
462 "openpgp4fpr",
463 "sip",
464 "sms",
465 "smsto",
466 "ssh",
467 "tel",
468 "url",
469 "webcal",
470 "wtai",
471 "xmpp"
472 ];
473 let allowed_classes = hashmap![];
474
475 Builder {
476 tags,
477 clean_content_tags,
478 tag_attributes,
479 tag_attribute_values,
480 set_tag_attribute_values,
481 generic_attributes,
482 url_schemes,
483 url_relative: UrlRelative::PassThrough,
484 attribute_filter: None,
485 link_rel: Some("noopener noreferrer"),
486 allowed_classes,
487 strip_comments: true,
488 id_prefix: None,
489 generic_attribute_prefixes: None,
490 }
491 }
492}
493
494impl<'a> Builder<'a> {
495 /// Sets the tags that are allowed.
496 ///
497 /// # Examples
498 ///
499 /// use ammonia::Builder;
500 /// use maplit::hashset;
501 ///
502 /// # fn main() {
503 /// let tags = hashset!["my-tag"];
504 /// let a = Builder::new()
505 /// .tags(tags)
506 /// .clean("<my-tag>")
507 /// .to_string();
508 /// assert_eq!(a, "<my-tag></my-tag>");
509 /// # }
510 ///
511 /// # Defaults
512 ///
513 /// ```notest
514 /// a, abbr, acronym, area, article, aside, b, bdi,
515 /// bdo, blockquote, br, caption, center, cite, code,
516 /// col, colgroup, data, dd, del, details, dfn, div,
517 /// dl, dt, em, figcaption, figure, footer, h1, h2,
518 /// h3, h4, h5, h6, header, hgroup, hr, i, img,
519 /// ins, kbd, kbd, li, map, mark, nav, ol, p, pre,
520 /// q, rp, rt, rtc, ruby, s, samp, small, span,
521 /// strike, strong, sub, summary, sup, table, tbody,
522 /// td, th, thead, time, tr, tt, u, ul, var, wbr
523 /// ```
524 pub fn tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
525 self.tags = value;
526 self
527 }
528
529 /// Add additonal whitelisted tags without overwriting old ones.
530 ///
531 /// Does nothing if the tag is already there.
532 ///
533 /// # Examples
534 ///
535 /// let a = ammonia::Builder::default()
536 /// .add_tags(&["my-tag"])
537 /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
538 /// assert_eq!("<my-tag>test</my-tag> <span>mess</span>", a);
539 pub fn add_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
540 &mut self,
541 it: I,
542 ) -> &mut Self {
543 self.tags.extend(it.into_iter().map(Borrow::borrow));
544 self
545 }
546
547 /// Remove already-whitelisted tags.
548 ///
549 /// Does nothing if the tags is already gone.
550 ///
551 /// # Examples
552 ///
553 /// let a = ammonia::Builder::default()
554 /// .rm_tags(&["span"])
555 /// .clean("<span></span>").to_string();
556 /// assert_eq!("", a);
557 pub fn rm_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
558 &mut self,
559 it: I,
560 ) -> &mut Self {
561 for i in it {
562 self.tags.remove(i.borrow());
563 }
564 self
565 }
566
567 /// Returns a copy of the set of whitelisted tags.
568 ///
569 /// # Examples
570 ///
571 /// use maplit::hashset;
572 ///
573 /// let tags = hashset!["my-tag-1", "my-tag-2"];
574 ///
575 /// let mut b = ammonia::Builder::default();
576 /// b.tags(Clone::clone(&tags));
577 /// assert_eq!(tags, b.clone_tags());
578 pub fn clone_tags(&self) -> HashSet<&'a str> {
579 self.tags.clone()
580 }
581
582 /// Sets the tags whose contents will be completely removed from the output.
583 ///
584 /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
585 /// a panic.
586 ///
587 /// # Examples
588 ///
589 /// use ammonia::Builder;
590 /// use maplit::hashset;
591 ///
592 /// # fn main() {
593 /// let tag_blacklist = hashset!["script", "style"];
594 /// let a = Builder::new()
595 /// .clean_content_tags(tag_blacklist)
596 /// .clean("<script>alert('hello')</script><style>a { background: #fff }</style>")
597 /// .to_string();
598 /// assert_eq!(a, "");
599 /// # }
600 ///
601 /// # Defaults
602 ///
603 /// ```notest
604 /// script, style
605 /// ```
606 pub fn clean_content_tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
607 self.clean_content_tags = value;
608 self
609 }
610
611 /// Add additonal blacklisted clean-content tags without overwriting old ones.
612 ///
613 /// Does nothing if the tag is already there.
614 ///
615 /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
616 /// a panic.
617 ///
618 /// # Examples
619 ///
620 /// let a = ammonia::Builder::default()
621 /// .add_clean_content_tags(&["my-tag"])
622 /// .clean("<my-tag>test</my-tag><span>mess</span>").to_string();
623 /// assert_eq!("<span>mess</span>", a);
624 pub fn add_clean_content_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
625 &mut self,
626 it: I,
627 ) -> &mut Self {
628 self.clean_content_tags
629 .extend(it.into_iter().map(Borrow::borrow));
630 self
631 }
632
633 /// Remove already-blacklisted clean-content tags.
634 ///
635 /// Does nothing if the tags aren't blacklisted.
636 ///
637 /// # Examples
638 /// use ammonia::Builder;
639 /// use maplit::hashset;
640 ///
641 /// # fn main() {
642 /// let tag_blacklist = hashset!["script"];
643 /// let a = ammonia::Builder::default()
644 /// .clean_content_tags(tag_blacklist)
645 /// .rm_clean_content_tags(&["script"])
646 /// .clean("<script>XSS</script>").to_string();
647 /// assert_eq!("XSS", a);
648 /// # }
649 pub fn rm_clean_content_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
650 &mut self,
651 it: I,
652 ) -> &mut Self {
653 for i in it {
654 self.clean_content_tags.remove(i.borrow());
655 }
656 self
657 }
658
659 /// Returns a copy of the set of blacklisted clean-content tags.
660 ///
661 /// # Examples
662 /// # use maplit::hashset;
663 ///
664 /// let tags = hashset!["my-tag-1", "my-tag-2"];
665 ///
666 /// let mut b = ammonia::Builder::default();
667 /// b.clean_content_tags(Clone::clone(&tags));
668 /// assert_eq!(tags, b.clone_clean_content_tags());
669 pub fn clone_clean_content_tags(&self) -> HashSet<&'a str> {
670 self.clean_content_tags.clone()
671 }
672
673 /// Sets the HTML attributes that are allowed on specific tags.
674 ///
675 /// The value is structured as a map from tag names to a set of attribute names.
676 ///
677 /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
678 ///
679 /// # Examples
680 ///
681 /// use ammonia::Builder;
682 /// use maplit::{hashmap, hashset};
683 ///
684 /// # fn main() {
685 /// let tags = hashset!["my-tag"];
686 /// let tag_attributes = hashmap![
687 /// "my-tag" => hashset!["val"]
688 /// ];
689 /// let a = Builder::new().tags(tags).tag_attributes(tag_attributes)
690 /// .clean("<my-tag val=1>")
691 /// .to_string();
692 /// assert_eq!(a, "<my-tag val=\"1\"></my-tag>");
693 /// # }
694 ///
695 /// # Defaults
696 ///
697 /// ```notest
698 /// a =>
699 /// href, hreflang
700 /// bdo =>
701 /// dir
702 /// blockquote =>
703 /// cite
704 /// col =>
705 /// align, char, charoff, span
706 /// colgroup =>
707 /// align, char, charoff, span
708 /// del =>
709 /// cite, datetime
710 /// hr =>
711 /// align, size, width
712 /// img =>
713 /// align, alt, height, src, width
714 /// ins =>
715 /// cite, datetime
716 /// ol =>
717 /// start
718 /// q =>
719 /// cite
720 /// table =>
721 /// align, char, charoff, summary
722 /// tbody =>
723 /// align, char, charoff
724 /// td =>
725 /// align, char, charoff, colspan, headers, rowspan
726 /// tfoot =>
727 /// align, char, charoff
728 /// th =>
729 /// align, char, charoff, colspan, headers, rowspan, scope
730 /// thead =>
731 /// align, char, charoff
732 /// tr =>
733 /// align, char, charoff
734 /// ```
735 pub fn tag_attributes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
736 self.tag_attributes = value;
737 self
738 }
739
740 /// Add additonal whitelisted tag-specific attributes without overwriting old ones.
741 ///
742 /// # Examples
743 ///
744 /// let a = ammonia::Builder::default()
745 /// .add_tags(&["my-tag"])
746 /// .add_tag_attributes("my-tag", &["my-attr"])
747 /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
748 /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
749 pub fn add_tag_attributes<
750 T: 'a + ?Sized + Borrow<str>,
751 U: 'a + ?Sized + Borrow<str>,
752 I: IntoIter<Item = &'a T>,
753 >(
754 &mut self,
755 tag: &'a U,
756 it: I,
757 ) -> &mut Self {
758 self.tag_attributes
759 .entry(tag.borrow())
760 .or_insert_with(HashSet::new)
761 .extend(it.into_iter().map(Borrow::borrow));
762 self
763 }
764
765 /// Remove already-whitelisted tag-specific attributes.
766 ///
767 /// Does nothing if the attribute is already gone.
768 ///
769 /// # Examples
770 ///
771 /// let a = ammonia::Builder::default()
772 /// .rm_tag_attributes("a", &["href"])
773 /// .clean("<a href=\"/\"></a>").to_string();
774 /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
775 pub fn rm_tag_attributes<
776 'b,
777 'c,
778 T: 'b + ?Sized + Borrow<str>,
779 U: 'c + ?Sized + Borrow<str>,
780 I: IntoIter<Item = &'b T>,
781 >(
782 &mut self,
783 tag: &'c U,
784 it: I,
785 ) -> &mut Self {
786 if let Some(tag) = self.tag_attributes.get_mut(tag.borrow()) {
787 for i in it {
788 tag.remove(i.borrow());
789 }
790 }
791 self
792 }
793
794 /// Returns a copy of the set of whitelisted tag-specific attributes.
795 ///
796 /// # Examples
797 /// use maplit::{hashmap, hashset};
798 ///
799 /// let tag_attributes = hashmap![
800 /// "my-tag" => hashset!["my-attr-1", "my-attr-2"]
801 /// ];
802 ///
803 /// let mut b = ammonia::Builder::default();
804 /// b.tag_attributes(Clone::clone(&tag_attributes));
805 /// assert_eq!(tag_attributes, b.clone_tag_attributes());
806 pub fn clone_tag_attributes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
807 self.tag_attributes.clone()
808 }
809
810 /// Sets the values of HTML attributes that are allowed on specific tags.
811 ///
812 /// The value is structured as a map from tag names to a map from attribute names to a set of
813 /// attribute values.
814 ///
815 /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
816 ///
817 /// # Examples
818 ///
819 /// use ammonia::Builder;
820 /// use maplit::{hashmap, hashset};
821 ///
822 /// # fn main() {
823 /// let tags = hashset!["my-tag"];
824 /// let tag_attribute_values = hashmap![
825 /// "my-tag" => hashmap![
826 /// "my-attr" => hashset!["val"],
827 /// ],
828 /// ];
829 /// let a = Builder::new().tags(tags).tag_attribute_values(tag_attribute_values)
830 /// .clean("<my-tag my-attr=val>")
831 /// .to_string();
832 /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
833 /// # }
834 ///
835 /// # Defaults
836 ///
837 /// None.
838 pub fn tag_attribute_values(
839 &mut self,
840 value: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
841 ) -> &mut Self {
842 self.tag_attribute_values = value;
843 self
844 }
845
846 /// Add additonal whitelisted tag-specific attribute values without overwriting old ones.
847 ///
848 /// # Examples
849 ///
850 /// let a = ammonia::Builder::default()
851 /// .add_tags(&["my-tag"])
852 /// .add_tag_attribute_values("my-tag", "my-attr", &[""])
853 /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
854 /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
855 pub fn add_tag_attribute_values<
856 T: 'a + ?Sized + Borrow<str>,
857 U: 'a + ?Sized + Borrow<str>,
858 V: 'a + ?Sized + Borrow<str>,
859 I: IntoIter<Item = &'a T>,
860 >(
861 &mut self,
862 tag: &'a U,
863 attribute: &'a V,
864 it: I,
865 ) -> &mut Self {
866 self.tag_attribute_values
867 .entry(tag.borrow())
868 .or_insert_with(HashMap::new)
869 .entry(attribute.borrow())
870 .or_insert_with(HashSet::new)
871 .extend(it.into_iter().map(Borrow::borrow));
872
873 self
874 }
875
876 /// Remove already-whitelisted tag-specific attribute values.
877 ///
878 /// Does nothing if the attribute or the value is already gone.
879 ///
880 /// # Examples
881 ///
882 /// let a = ammonia::Builder::default()
883 /// .rm_tag_attributes("a", &["href"])
884 /// .add_tag_attribute_values("a", "href", &["/"])
885 /// .rm_tag_attribute_values("a", "href", &["/"])
886 /// .clean("<a href=\"/\"></a>").to_string();
887 /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
888 pub fn rm_tag_attribute_values<
889 'b,
890 'c,
891 T: 'b + ?Sized + Borrow<str>,
892 U: 'c + ?Sized + Borrow<str>,
893 V: 'c + ?Sized + Borrow<str>,
894 I: IntoIter<Item = &'b T>,
895 >(
896 &mut self,
897 tag: &'c U,
898 attribute: &'c V,
899 it: I,
900 ) -> &mut Self {
901 if let Some(attrs) = self
902 .tag_attribute_values
903 .get_mut(tag.borrow())
904 .and_then(|map| map.get_mut(attribute.borrow()))
905 {
906 for i in it {
907 attrs.remove(i.borrow());
908 }
909 }
910 self
911 }
912
913 /// Returns a copy of the set of whitelisted tag-specific attribute values.
914 ///
915 /// # Examples
916 ///
917 /// use maplit::{hashmap, hashset};
918 ///
919 /// let attribute_values = hashmap![
920 /// "my-attr-1" => hashset!["foo"],
921 /// "my-attr-2" => hashset!["baz", "bar"],
922 /// ];
923 /// let tag_attribute_values = hashmap![
924 /// "my-tag" => attribute_values
925 /// ];
926 ///
927 /// let mut b = ammonia::Builder::default();
928 /// b.tag_attribute_values(Clone::clone(&tag_attribute_values));
929 /// assert_eq!(tag_attribute_values, b.clone_tag_attribute_values());
930 pub fn clone_tag_attribute_values(
931 &self,
932 ) -> HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>> {
933 self.tag_attribute_values.clone()
934 }
935
936 /// Sets the values of HTML attributes that are to be set on specific tags.
937 ///
938 /// The value is structured as a map from tag names to a map from attribute names to an
939 /// attribute value.
940 ///
941 /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
942 ///
943 /// # Examples
944 ///
945 /// use ammonia::Builder;
946 /// use maplit::{hashmap, hashset};
947 ///
948 /// # fn main() {
949 /// let tags = hashset!["my-tag"];
950 /// let set_tag_attribute_values = hashmap![
951 /// "my-tag" => hashmap![
952 /// "my-attr" => "val",
953 /// ],
954 /// ];
955 /// let a = Builder::new().tags(tags).set_tag_attribute_values(set_tag_attribute_values)
956 /// .clean("<my-tag>")
957 /// .to_string();
958 /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
959 /// # }
960 ///
961 /// # Defaults
962 ///
963 /// None.
964 pub fn set_tag_attribute_values(
965 &mut self,
966 value: HashMap<&'a str, HashMap<&'a str, &'a str>>,
967 ) -> &mut Self {
968 self.set_tag_attribute_values = value;
969 self
970 }
971
972 /// Add an attribute value to set on a specific element.
973 ///
974 /// # Examples
975 ///
976 /// let a = ammonia::Builder::default()
977 /// .add_tags(&["my-tag"])
978 /// .set_tag_attribute_value("my-tag", "my-attr", "val")
979 /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
980 /// assert_eq!("<my-tag my-attr=\"val\">test</my-tag> <span>mess</span>", a);
981 pub fn set_tag_attribute_value<
982 T: 'a + ?Sized + Borrow<str>,
983 A: 'a + ?Sized + Borrow<str>,
984 V: 'a + ?Sized + Borrow<str>,
985 >(
986 &mut self,
987 tag: &'a T,
988 attribute: &'a A,
989 value: &'a V,
990 ) -> &mut Self {
991 self.set_tag_attribute_values
992 .entry(tag.borrow())
993 .or_insert_with(HashMap::new)
994 .insert(attribute.borrow(), value.borrow());
995 self
996 }
997
998 /// Remove existing tag-specific attribute values to be set.
999 ///
1000 /// Does nothing if the attribute is already gone.
1001 ///
1002 /// # Examples
1003 ///
1004 /// let a = ammonia::Builder::default()
1005 /// // this does nothing, since no value is set for this tag attribute yet
1006 /// .rm_set_tag_attribute_value("a", "target")
1007 /// .set_tag_attribute_value("a", "target", "_blank")
1008 /// .rm_set_tag_attribute_value("a", "target")
1009 /// .clean("<a href=\"/\"></a>").to_string();
1010 /// assert_eq!("<a href=\"/\" rel=\"noopener noreferrer\"></a>", a);
1011 pub fn rm_set_tag_attribute_value<
1012 T: 'a + ?Sized + Borrow<str>,
1013 A: 'a + ?Sized + Borrow<str>,
1014 >(
1015 &mut self,
1016 tag: &'a T,
1017 attribute: &'a A,
1018 ) -> &mut Self {
1019 if let Some(attributes) = self.set_tag_attribute_values.get_mut(tag.borrow()) {
1020 attributes.remove(attribute.borrow());
1021 }
1022 self
1023 }
1024
1025 /// Returns the value that will be set for the attribute on the element, if any.
1026 ///
1027 /// # Examples
1028 ///
1029 /// let mut b = ammonia::Builder::default();
1030 /// b.set_tag_attribute_value("a", "target", "_blank");
1031 /// let value = b.get_set_tag_attribute_value("a", "target");
1032 /// assert_eq!(value, Some("_blank"));
1033 pub fn get_set_tag_attribute_value<
1034 T: 'a + ?Sized + Borrow<str>,
1035 A: 'a + ?Sized + Borrow<str>,
1036 >(
1037 &self,
1038 tag: &'a T,
1039 attribute: &'a A,
1040 ) -> Option<&'a str> {
1041 self.set_tag_attribute_values
1042 .get(tag.borrow())
1043 .and_then(|map| map.get(attribute.borrow()))
1044 .copied()
1045 }
1046
1047 /// Returns a copy of the set of tag-specific attribute values to be set.
1048 ///
1049 /// # Examples
1050 ///
1051 /// use maplit::{hashmap, hashset};
1052 ///
1053 /// let attribute_values = hashmap![
1054 /// "my-attr-1" => "foo",
1055 /// "my-attr-2" => "bar",
1056 /// ];
1057 /// let set_tag_attribute_values = hashmap![
1058 /// "my-tag" => attribute_values,
1059 /// ];
1060 ///
1061 /// let mut b = ammonia::Builder::default();
1062 /// b.set_tag_attribute_values(Clone::clone(&set_tag_attribute_values));
1063 /// assert_eq!(set_tag_attribute_values, b.clone_set_tag_attribute_values());
1064 pub fn clone_set_tag_attribute_values(&self) -> HashMap<&'a str, HashMap<&'a str, &'a str>> {
1065 self.set_tag_attribute_values.clone()
1066 }
1067
1068 /// Sets the prefix of attributes that are allowed on any tag.
1069 ///
1070 /// # Examples
1071 ///
1072 /// use ammonia::Builder;
1073 /// use maplit::hashset;
1074 ///
1075 /// # fn main() {
1076 /// let prefixes = hashset!["data-"];
1077 /// let a = Builder::new()
1078 /// .generic_attribute_prefixes(prefixes)
1079 /// .clean("<b data-val=1>")
1080 /// .to_string();
1081 /// assert_eq!(a, "<b data-val=\"1\"></b>");
1082 /// # }
1083 ///
1084 /// # Defaults
1085 ///
1086 /// No attribute prefixes are allowed by default.
1087 pub fn generic_attribute_prefixes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1088 self.generic_attribute_prefixes = Some(value);
1089 self
1090 }
1091
1092 /// Add additional whitelisted attribute prefix without overwriting old ones.
1093 ///
1094 /// # Examples
1095 ///
1096 /// let a = ammonia::Builder::default()
1097 /// .add_generic_attribute_prefixes(&["my-"])
1098 /// .clean("<span my-attr>mess</span>").to_string();
1099 /// assert_eq!("<span my-attr=\"\">mess</span>", a);
1100 pub fn add_generic_attribute_prefixes<
1101 T: 'a + ?Sized + Borrow<str>,
1102 I: IntoIter<Item = &'a T>,
1103 >(
1104 &mut self,
1105 it: I,
1106 ) -> &mut Self {
1107 self.generic_attribute_prefixes
1108 .get_or_insert_with(HashSet::new)
1109 .extend(it.into_iter().map(Borrow::borrow));
1110 self
1111 }
1112
1113 /// Remove already-whitelisted attribute prefixes.
1114 ///
1115 /// Does nothing if the attribute prefix is already gone.
1116 ///
1117 /// # Examples
1118 ///
1119 /// let a = ammonia::Builder::default()
1120 /// .add_generic_attribute_prefixes(&["data-", "code-"])
1121 /// .rm_generic_attribute_prefixes(&["data-"])
1122 /// .clean("<span code-test=\"foo\" data-test=\"cool\"></span>").to_string();
1123 /// assert_eq!("<span code-test=\"foo\"></span>", a);
1124 pub fn rm_generic_attribute_prefixes<
1125 'b,
1126 T: 'b + ?Sized + Borrow<str>,
1127 I: IntoIter<Item = &'b T>,
1128 >(
1129 &mut self,
1130 it: I,
1131 ) -> &mut Self {
1132 if let Some(true) = self.generic_attribute_prefixes.as_mut().map(|prefixes| {
1133 for i in it {
1134 let _ = prefixes.remove(i.borrow());
1135 }
1136 prefixes.is_empty()
1137 }) {
1138 self.generic_attribute_prefixes = None;
1139 }
1140 self
1141 }
1142
1143 /// Returns a copy of the set of whitelisted attribute prefixes.
1144 ///
1145 /// # Examples
1146 ///
1147 /// use maplit::hashset;
1148 ///
1149 /// let generic_attribute_prefixes = hashset!["my-prfx-1-", "my-prfx-2-"];
1150 ///
1151 /// let mut b = ammonia::Builder::default();
1152 /// b.generic_attribute_prefixes(Clone::clone(&generic_attribute_prefixes));
1153 /// assert_eq!(Some(generic_attribute_prefixes), b.clone_generic_attribute_prefixes());
1154 pub fn clone_generic_attribute_prefixes(&self) -> Option<HashSet<&'a str>> {
1155 self.generic_attribute_prefixes.clone()
1156 }
1157
1158 /// Sets the attributes that are allowed on any tag.
1159 ///
1160 /// # Examples
1161 ///
1162 /// use ammonia::Builder;
1163 /// use maplit::hashset;
1164 ///
1165 /// # fn main() {
1166 /// let attributes = hashset!["data-val"];
1167 /// let a = Builder::new()
1168 /// .generic_attributes(attributes)
1169 /// .clean("<b data-val=1>")
1170 /// .to_string();
1171 /// assert_eq!(a, "<b data-val=\"1\"></b>");
1172 /// # }
1173 ///
1174 /// # Defaults
1175 ///
1176 /// ```notest
1177 /// lang, title
1178 /// ```
1179 pub fn generic_attributes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1180 self.generic_attributes = value;
1181 self
1182 }
1183
1184 /// Add additonal whitelisted attributes without overwriting old ones.
1185 ///
1186 /// # Examples
1187 ///
1188 /// let a = ammonia::Builder::default()
1189 /// .add_generic_attributes(&["my-attr"])
1190 /// .clean("<span my-attr>mess</span>").to_string();
1191 /// assert_eq!("<span my-attr=\"\">mess</span>", a);
1192 pub fn add_generic_attributes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
1193 &mut self,
1194 it: I,
1195 ) -> &mut Self {
1196 self.generic_attributes
1197 .extend(it.into_iter().map(Borrow::borrow));
1198 self
1199 }
1200
1201 /// Remove already-whitelisted attributes.
1202 ///
1203 /// Does nothing if the attribute is already gone.
1204 ///
1205 /// # Examples
1206 ///
1207 /// let a = ammonia::Builder::default()
1208 /// .rm_generic_attributes(&["title"])
1209 /// .clean("<span title=\"cool\"></span>").to_string();
1210 /// assert_eq!("<span></span>", a);
1211 pub fn rm_generic_attributes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
1212 &mut self,
1213 it: I,
1214 ) -> &mut Self {
1215 for i in it {
1216 self.generic_attributes.remove(i.borrow());
1217 }
1218 self
1219 }
1220
1221 /// Returns a copy of the set of whitelisted attributes.
1222 ///
1223 /// # Examples
1224 ///
1225 /// use maplit::hashset;
1226 ///
1227 /// let generic_attributes = hashset!["my-attr-1", "my-attr-2"];
1228 ///
1229 /// let mut b = ammonia::Builder::default();
1230 /// b.generic_attributes(Clone::clone(&generic_attributes));
1231 /// assert_eq!(generic_attributes, b.clone_generic_attributes());
1232 pub fn clone_generic_attributes(&self) -> HashSet<&'a str> {
1233 self.generic_attributes.clone()
1234 }
1235
1236 /// Sets the URL schemes permitted on `href` and `src` attributes.
1237 ///
1238 /// # Examples
1239 ///
1240 /// use ammonia::Builder;
1241 /// use maplit::hashset;
1242 ///
1243 /// # fn main() {
1244 /// let url_schemes = hashset![
1245 /// "http", "https", "mailto", "magnet"
1246 /// ];
1247 /// let a = Builder::new().url_schemes(url_schemes)
1248 /// .clean("<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\">zero-length file</a>")
1249 /// .to_string();
1250 ///
1251 /// // See `link_rel` for information on the rel="noopener noreferrer" attribute
1252 /// // in the cleaned HTML.
1253 /// assert_eq!(a,
1254 /// "<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&amp;xl=0&amp;dn=zero_len.fil&amp;xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&amp;xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\" rel=\"noopener noreferrer\">zero-length file</a>");
1255 /// # }
1256 ///
1257 /// # Defaults
1258 ///
1259 /// ```notest
1260 /// bitcoin, ftp, ftps, geo, http, https, im, irc,
1261 /// ircs, magnet, mailto, mms, mx, news, nntp,
1262 /// openpgp4fpr, sip, sms, smsto, ssh, tel, url,
1263 /// webcal, wtai, xmpp
1264 /// ```
1265 pub fn url_schemes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1266 self.url_schemes = value;
1267 self
1268 }
1269
1270 /// Add additonal whitelisted URL schemes without overwriting old ones.
1271 ///
1272 /// # Examples
1273 ///
1274 /// let a = ammonia::Builder::default()
1275 /// .add_url_schemes(&["my-scheme"])
1276 /// .clean("<a href=my-scheme:home>mess</span>").to_string();
1277 /// assert_eq!("<a href=\"my-scheme:home\" rel=\"noopener noreferrer\">mess</a>", a);
1278 pub fn add_url_schemes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
1279 &mut self,
1280 it: I,
1281 ) -> &mut Self {
1282 self.url_schemes.extend(it.into_iter().map(Borrow::borrow));
1283 self
1284 }
1285
1286 /// Remove already-whitelisted attributes.
1287 ///
1288 /// Does nothing if the attribute is already gone.
1289 ///
1290 /// # Examples
1291 ///
1292 /// let a = ammonia::Builder::default()
1293 /// .rm_url_schemes(&["ftp"])
1294 /// .clean("<a href=\"ftp://ftp.mozilla.org/\"></a>").to_string();
1295 /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
1296 pub fn rm_url_schemes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
1297 &mut self,
1298 it: I,
1299 ) -> &mut Self {
1300 for i in it {
1301 self.url_schemes.remove(i.borrow());
1302 }
1303 self
1304 }
1305
1306 /// Returns a copy of the set of whitelisted URL schemes.
1307 ///
1308 /// # Examples
1309 /// use maplit::hashset;
1310 ///
1311 /// let url_schemes = hashset!["my-scheme-1", "my-scheme-2"];
1312 ///
1313 /// let mut b = ammonia::Builder::default();
1314 /// b.url_schemes(Clone::clone(&url_schemes));
1315 /// assert_eq!(url_schemes, b.clone_url_schemes());
1316 pub fn clone_url_schemes(&self) -> HashSet<&'a str> {
1317 self.url_schemes.clone()
1318 }
1319
1320 /// Configures the behavior for relative URLs: pass-through, resolve-with-base, or deny.
1321 ///
1322 /// # Examples
1323 ///
1324 /// use ammonia::{Builder, UrlRelative};
1325 ///
1326 /// let a = Builder::new().url_relative(UrlRelative::PassThrough)
1327 /// .clean("<a href=/>Home</a>")
1328 /// .to_string();
1329 ///
1330 /// // See `link_rel` for information on the rel="noopener noreferrer" attribute
1331 /// // in the cleaned HTML.
1332 /// assert_eq!(
1333 /// a,
1334 /// "<a href=\"/\" rel=\"noopener noreferrer\">Home</a>");
1335 ///
1336 /// # Defaults
1337 ///
1338 /// ```notest
1339 /// UrlRelative::PassThrough
1340 /// ```
1341 pub fn url_relative(&mut self, value: UrlRelative) -> &mut Self {
1342 self.url_relative = value;
1343 self
1344 }
1345
1346 /// Allows rewriting of all attributes using a callback.
1347 ///
1348 /// The callback takes name of the element, attribute and its value.
1349 /// Returns `None` to remove the attribute, or a value to use.
1350 ///
1351 /// Rewriting of attributes with URLs is done before `url_relative()`.
1352 ///
1353 /// # Panics
1354 ///
1355 /// If more than one callback is set.
1356 ///
1357 /// # Examples
1358 ///
1359 /// ```rust
1360 /// use ammonia::Builder;
1361 /// let a = Builder::new()
1362 /// .attribute_filter(|element, attribute, value| {
1363 /// match (element, attribute) {
1364 /// ("img", "src") => None,
1365 /// _ => Some(value.into())
1366 /// }
1367 /// })
1368 /// .link_rel(None)
1369 /// .clean("<a href=/><img alt=Home src=foo></a>")
1370 /// .to_string();
1371 /// assert_eq!(a,
1372 /// r#"<a href="/"><img alt="Home"></a>"#);
1373 /// ```
1374 pub fn attribute_filter<'cb, CallbackFn>(&mut self, callback: CallbackFn) -> &mut Self
1375 where
1376 CallbackFn: for<'u> Fn(&str, &str, &'u str) -> Option<Cow<'u, str>> + Send + Sync + 'static,
1377 {
1378 assert!(
1379 self.attribute_filter.is_none(),
1380 "attribute_filter can be set only once"
1381 );
1382 self.attribute_filter = Some(Box::new(callback));
1383 self
1384 }
1385
1386 /// Returns `true` if the relative URL resolver is set to `Deny`.
1387 ///
1388 /// # Examples
1389 ///
1390 /// use ammonia::{Builder, UrlRelative};
1391 /// let mut a = Builder::default();
1392 /// a.url_relative(UrlRelative::Deny);
1393 /// assert!(a.is_url_relative_deny());
1394 /// a.url_relative(UrlRelative::PassThrough);
1395 /// assert!(!a.is_url_relative_deny());
1396 pub fn is_url_relative_deny(&self) -> bool {
1397 matches!(self.url_relative, UrlRelative::Deny)
1398 }
1399
1400 /// Returns `true` if the relative URL resolver is set to `PassThrough`.
1401 ///
1402 /// # Examples
1403 ///
1404 /// use ammonia::{Builder, UrlRelative};
1405 /// let mut a = Builder::default();
1406 /// a.url_relative(UrlRelative::Deny);
1407 /// assert!(!a.is_url_relative_pass_through());
1408 /// a.url_relative(UrlRelative::PassThrough);
1409 /// assert!(a.is_url_relative_pass_through());
1410 pub fn is_url_relative_pass_through(&self) -> bool {
1411 matches!(self.url_relative, UrlRelative::PassThrough)
1412 }
1413
1414 /// Returns `true` if the relative URL resolver is set to `Custom`.
1415 ///
1416 /// # Examples
1417 ///
1418 /// use ammonia::{Builder, UrlRelative};
1419 /// use std::borrow::Cow;
1420 /// fn test(a: &str) -> Option<Cow<str>> { None }
1421 /// # fn main() {
1422 /// let mut a = Builder::default();
1423 /// a.url_relative(UrlRelative::Custom(Box::new(test)));
1424 /// assert!(a.is_url_relative_custom());
1425 /// a.url_relative(UrlRelative::PassThrough);
1426 /// assert!(!a.is_url_relative_custom());
1427 /// a.url_relative(UrlRelative::Deny);
1428 /// assert!(!a.is_url_relative_custom());
1429 /// # }
1430 pub fn is_url_relative_custom(&self) -> bool {
1431 matches!(self.url_relative, UrlRelative::Custom(_))
1432 }
1433
1434 /// Configures a `rel` attribute that will be added on links.
1435 ///
1436 /// If `rel` is in the generic or tag attributes, this must be set to `None`.
1437 /// Common `rel` values to include:
1438 ///
1439 /// * `noopener`: This prevents [a particular type of XSS attack],
1440 /// and should usually be turned on for untrusted HTML.
1441 /// * `noreferrer`: This prevents the browser from [sending the source URL]
1442 /// to the website that is linked to.
1443 /// * `nofollow`: This prevents search engines from [using this link for
1444 /// ranking], which disincentivizes spammers.
1445 ///
1446 /// To turn on rel-insertion, call this function with a space-separated list.
1447 /// Ammonia does not parse rel-attributes;
1448 /// it just puts the given string into the attribute directly.
1449 ///
1450 /// [a particular type of XSS attack]: https://mathiasbynens.github.io/rel-noopener/
1451 /// [sending the source URL]: https://en.wikipedia.org/wiki/HTTP_referer
1452 /// [using this link for ranking]: https://en.wikipedia.org/wiki/Nofollow
1453 ///
1454 /// # Examples
1455 ///
1456 /// use ammonia::Builder;
1457 ///
1458 /// let a = Builder::new().link_rel(None)
1459 /// .clean("<a href=https://rust-lang.org/>Rust</a>")
1460 /// .to_string();
1461 /// assert_eq!(
1462 /// a,
1463 /// "<a href=\"https://rust-lang.org/\">Rust</a>");
1464 ///
1465 /// # Defaults
1466 ///
1467 /// ```notest
1468 /// Some("noopener noreferrer")
1469 /// ```
1470 pub fn link_rel(&mut self, value: Option<&'a str>) -> &mut Self {
1471 self.link_rel = value;
1472 self
1473 }
1474
1475 /// Returns the settings for links' `rel` attribute, if one is set.
1476 ///
1477 /// # Examples
1478 ///
1479 /// use ammonia::{Builder, UrlRelative};
1480 /// let mut a = Builder::default();
1481 /// a.link_rel(Some("a b"));
1482 /// assert_eq!(a.get_link_rel(), Some("a b"));
1483 pub fn get_link_rel(&self) -> Option<&str> {
1484 self.link_rel
1485 }
1486
1487 /// Sets the CSS classes that are allowed on specific tags.
1488 ///
1489 /// The values is structured as a map from tag names to a set of class names.
1490 ///
1491 /// If the `class` attribute is itself whitelisted for a tag, then adding entries to
1492 /// this map will cause a panic.
1493 ///
1494 /// # Examples
1495 ///
1496 /// use ammonia::Builder;
1497 /// use maplit::{hashmap, hashset};
1498 ///
1499 /// # fn main() {
1500 /// let allowed_classes = hashmap![
1501 /// "code" => hashset!["rs", "ex", "c", "cxx", "js"]
1502 /// ];
1503 /// let a = Builder::new()
1504 /// .allowed_classes(allowed_classes)
1505 /// .clean("<code class=rs>fn main() {}</code>")
1506 /// .to_string();
1507 /// assert_eq!(
1508 /// a,
1509 /// "<code class=\"rs\">fn main() {}</code>");
1510 /// # }
1511 ///
1512 /// # Defaults
1513 ///
1514 /// The set of allowed classes is empty by default.
1515 pub fn allowed_classes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
1516 self.allowed_classes = value;
1517 self
1518 }
1519
1520 /// Add additonal whitelisted classes without overwriting old ones.
1521 ///
1522 /// # Examples
1523 ///
1524 /// let a = ammonia::Builder::default()
1525 /// .add_allowed_classes("a", &["onebox"])
1526 /// .clean("<a href=/ class=onebox>mess</span>").to_string();
1527 /// assert_eq!("<a href=\"/\" class=\"onebox\" rel=\"noopener noreferrer\">mess</a>", a);
1528 pub fn add_allowed_classes<
1529 T: 'a + ?Sized + Borrow<str>,
1530 U: 'a + ?Sized + Borrow<str>,
1531 I: IntoIter<Item = &'a T>,
1532 >(
1533 &mut self,
1534 tag: &'a U,
1535 it: I,
1536 ) -> &mut Self {
1537 self.allowed_classes
1538 .entry(tag.borrow())
1539 .or_insert_with(HashSet::new)
1540 .extend(it.into_iter().map(Borrow::borrow));
1541 self
1542 }
1543
1544 /// Remove already-whitelisted attributes.
1545 ///
1546 /// Does nothing if the attribute is already gone.
1547 ///
1548 /// # Examples
1549 ///
1550 /// let a = ammonia::Builder::default()
1551 /// .add_allowed_classes("span", &["active"])
1552 /// .rm_allowed_classes("span", &["active"])
1553 /// .clean("<span class=active>").to_string();
1554 /// assert_eq!("<span class=\"\"></span>", a);
1555 pub fn rm_allowed_classes<
1556 'b,
1557 'c,
1558 T: 'b + ?Sized + Borrow<str>,
1559 U: 'c + ?Sized + Borrow<str>,
1560 I: IntoIter<Item = &'b T>,
1561 >(
1562 &mut self,
1563 tag: &'c U,
1564 it: I,
1565 ) -> &mut Self {
1566 if let Some(tag) = self.allowed_classes.get_mut(tag.borrow()) {
1567 for i in it {
1568 tag.remove(i.borrow());
1569 }
1570 }
1571 self
1572 }
1573
1574 /// Returns a copy of the set of whitelisted class attributes.
1575 ///
1576 /// # Examples
1577 ///
1578 /// use maplit::{hashmap, hashset};
1579 ///
1580 /// let allowed_classes = hashmap![
1581 /// "my-tag" => hashset!["my-class-1", "my-class-2"]
1582 /// ];
1583 ///
1584 /// let mut b = ammonia::Builder::default();
1585 /// b.allowed_classes(Clone::clone(&allowed_classes));
1586 /// assert_eq!(allowed_classes, b.clone_allowed_classes());
1587 pub fn clone_allowed_classes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
1588 self.allowed_classes.clone()
1589 }
1590
1591 /// Configures the handling of HTML comments.
1592 ///
1593 /// If this option is false, comments will be preserved.
1594 ///
1595 /// # Examples
1596 ///
1597 /// use ammonia::Builder;
1598 ///
1599 /// let a = Builder::new().strip_comments(false)
1600 /// .clean("<!-- yes -->")
1601 /// .to_string();
1602 /// assert_eq!(
1603 /// a,
1604 /// "<!-- yes -->");
1605 ///
1606 /// # Defaults
1607 ///
1608 /// `true`
1609 pub fn strip_comments(&mut self, value: bool) -> &mut Self {
1610 self.strip_comments = value;
1611 self
1612 }
1613
1614 /// Returns `true` if comment stripping is turned on.
1615 ///
1616 /// # Examples
1617 ///
1618 /// let mut a = ammonia::Builder::new();
1619 /// a.strip_comments(true);
1620 /// assert!(a.will_strip_comments());
1621 /// a.strip_comments(false);
1622 /// assert!(!a.will_strip_comments());
1623 pub fn will_strip_comments(&self) -> bool {
1624 self.strip_comments
1625 }
1626
1627 /// Prefixes all "id" attribute values with a given string. Note that the tag and
1628 /// attribute themselves must still be whitelisted.
1629 ///
1630 /// # Examples
1631 ///
1632 /// use ammonia::Builder;
1633 /// use maplit::hashset;
1634 ///
1635 /// # fn main() {
1636 /// let attributes = hashset!["id"];
1637 /// let a = Builder::new()
1638 /// .generic_attributes(attributes)
1639 /// .id_prefix(Some("safe-"))
1640 /// .clean("<b id=42>")
1641 /// .to_string();
1642 /// assert_eq!(a, "<b id=\"safe-42\"></b>");
1643 /// # }
1644
1645 ///
1646 /// # Defaults
1647 ///
1648 /// `None`
1649 pub fn id_prefix(&mut self, value: Option<&'a str>) -> &mut Self {
1650 self.id_prefix = value;
1651 self
1652 }
1653
1654 /// Constructs a [`Builder`] instance configured with the [default options].
1655 ///
1656 /// # Examples
1657 ///
1658 /// use ammonia::{Builder, Url, UrlRelative};
1659 /// # use std::error::Error;
1660 ///
1661 /// # fn do_main() -> Result<(), Box<Error>> {
1662 /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
1663 /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
1664 ///
1665 /// let result = Builder::new() // <--
1666 /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1667 /// .clean(input)
1668 /// .to_string();
1669 /// assert_eq!(result, output);
1670 /// # Ok(())
1671 /// # }
1672 /// # fn main() { do_main().unwrap() }
1673 ///
1674 /// [default options]: fn.clean.html
1675 /// [`Builder`]: struct.Builder.html
1676 pub fn new() -> Self {
1677 Self::default()
1678 }
1679
1680 /// Constructs a [`Builder`] instance configured with no allowed tags.
1681 ///
1682 /// # Examples
1683 ///
1684 /// use ammonia::{Builder, Url, UrlRelative};
1685 /// # use std::error::Error;
1686 ///
1687 /// # fn do_main() -> Result<(), Box<Error>> {
1688 /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>empty()</code> function</a>.";
1689 /// let output = "This is an Ammonia example using the empty() function.";
1690 ///
1691 /// let result = Builder::empty() // <--
1692 /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1693 /// .clean(input)
1694 /// .to_string();
1695 /// assert_eq!(result, output);
1696 /// # Ok(())
1697 /// # }
1698 /// # fn main() { do_main().unwrap() }
1699 ///
1700 /// [default options]: fn.clean.html
1701 /// [`Builder`]: struct.Builder.html
1702 pub fn empty() -> Self {
1703 Self {
1704 tags: hashset![],
1705 ..Self::default()
1706 }
1707 }
1708
1709 /// Sanitizes an HTML fragment in a string according to the configured options.
1710 ///
1711 /// # Examples
1712 ///
1713 /// use ammonia::{Builder, Url, UrlRelative};
1714 /// # use std::error::Error;
1715 ///
1716 /// # fn do_main() -> Result<(), Box<Error>> {
1717 /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
1718 /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
1719 ///
1720 /// let result = Builder::new()
1721 /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1722 /// .clean(input)
1723 /// .to_string(); // <--
1724 /// assert_eq!(result, output);
1725 /// # Ok(())
1726 /// # }
1727 /// # fn main() { do_main().unwrap() }
1728 pub fn clean(&self, src: &str) -> Document {
1729 let parser = Self::make_parser();
1730 let dom = parser.one(src);
1731 self.clean_dom(dom)
1732 }
1733
1734 /// Sanitizes an HTML fragment from a reader according to the configured options.
1735 ///
1736 /// The input should be in UTF-8 encoding, otherwise the decoding is lossy, just
1737 /// like when using [`String::from_utf8_lossy`].
1738 ///
1739 /// To avoid consuming the reader, a mutable reference can be passed to this method.
1740 ///
1741 /// # Examples
1742 ///
1743 /// use ammonia::Builder;
1744 /// # use std::error::Error;
1745 ///
1746 /// # fn do_main() -> Result<(), Box<Error>> {
1747 /// let a = Builder::new()
1748 /// .clean_from_reader(&b"<!-- no -->"[..])? // notice the `b`
1749 /// .to_string();
1750 /// assert_eq!(a, "");
1751 /// # Ok(()) }
1752 /// # fn main() { do_main().unwrap() }
1753 ///
1754 /// [`String::from_utf8_lossy`]: https://doc.rust-lang.org/std/string/struct.String.html#method.from_utf8_lossy
1755 pub fn clean_from_reader<R>(&self, mut src: R) -> io::Result<Document>
1756 where
1757 R: io::Read,
1758 {
1759 let parser = Self::make_parser().from_utf8();
1760 let dom = parser.read_from(&mut src)?;
1761 Ok(self.clean_dom(dom))
1762 }
1763
1764 /// Clean a post-parsing DOM.
1765 ///
1766 /// This is not a public API because RcDom isn't really stable.
1767 /// We want to be able to take breaking changes to html5ever itself
1768 /// without having to break Ammonia's API.
1769 fn clean_dom(&self, mut dom: RcDom) -> Document {
1770 let mut stack = Vec::new();
1771 let mut removed = Vec::new();
1772 let link_rel = self
1773 .link_rel
1774 .map(|link_rel| format_tendril!("{}", link_rel));
1775 if link_rel.is_some() {
1776 assert!(self.generic_attributes.get("rel").is_none());
1777 assert!(self
1778 .tag_attributes
1779 .get("a")
1780 .and_then(|a| a.get("rel"))
1781 .is_none());
1782 }
1783 assert!(self.allowed_classes.is_empty() || !self.generic_attributes.contains("class"));
1784 for tag_name in self.allowed_classes.keys() {
1785 assert!(self
1786 .tag_attributes
1787 .get(tag_name)
1788 .and_then(|a| a.get("class"))
1789 .is_none());
1790 }
1791 for tag_name in &self.clean_content_tags {
1792 assert!(!self.tags.contains(tag_name));
1793 assert!(!self.tag_attributes.contains_key(tag_name));
1794 }
1795 let body = {
1796 let children = dom.document.children.borrow();
1797 children[0].clone()
1798 };
1799 stack.extend(
1800 mem::take(&mut *body.children.borrow_mut())
1801 .into_iter()
1802 .rev(),
1803 );
1804 // This design approach is used to prevent pathological content from producing
1805 // a stack overflow. The `stack` contains to-be-cleaned nodes, while `remove`,
1806 // of course, contains nodes that need to be dropped (we can't just drop them,
1807 // because they could have a very deep child tree).
1808 while let Some(mut node) = stack.pop() {
1809 let parent = node.parent
1810 .replace(None).expect("a node in the DOM will have a parent, except the root, which is not processed")
1811 .upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped");
1812 if self.clean_node_content(&node) {
1813 removed.push(node);
1814 continue;
1815 }
1816 let pass_clean = self.clean_child(&mut node);
1817 let pass = pass_clean && self.check_expected_namespace(&parent, &node);
1818 if pass {
1819 self.adjust_node_attributes(&mut node, &link_rel, self.id_prefix);
1820 dom.append(&parent.clone(), NodeOrText::AppendNode(node.clone()));
1821 } else {
1822 for sub in node.children.borrow_mut().iter_mut() {
1823 sub.parent.replace(Some(Rc::downgrade(&parent)));
1824 }
1825 }
1826 stack.extend(
1827 mem::take(&mut *node.children.borrow_mut())
1828 .into_iter()
1829 .rev(),
1830 );
1831 if !pass {
1832 removed.push(node);
1833 }
1834 }
1835 // Now, imperatively clean up all of the child nodes.
1836 // Otherwise, we could wind up with a DoS, either caused by a memory leak,
1837 // or caused by a stack overflow.
1838 while let Some(node) = removed.pop() {
1839 removed.extend_from_slice(&mem::take(&mut *node.children.borrow_mut())[..]);
1840 }
1841 Document(dom)
1842 }
1843
1844 /// Returns `true` if a node and all its content should be removed.
1845 fn clean_node_content(&self, node: &Handle) -> bool {
1846 match node.data {
1847 NodeData::Text { .. }
1848 | NodeData::Comment { .. }
1849 | NodeData::Doctype { .. }
1850 | NodeData::Document
1851 | NodeData::ProcessingInstruction { .. } => false,
1852 NodeData::Element { ref name, .. } => self.clean_content_tags.contains(&*name.local),
1853 }
1854 }
1855
1856 /// Remove unwanted attributes, and check if the node should be kept or not.
1857 ///
1858 /// The root node doesn't need cleaning because we create the root node ourselves,
1859 /// and it doesn't get serialized, and ... it just exists to give the parser
1860 /// a context (in this case, a div-like block context).
1861 fn clean_child(&self, child: &mut Handle) -> bool {
1862 match child.data {
1863 NodeData::Text { .. } => true,
1864 NodeData::Comment { .. } => !self.strip_comments,
1865 NodeData::Doctype { .. }
1866 | NodeData::Document
1867 | NodeData::ProcessingInstruction { .. } => false,
1868 NodeData::Element {
1869 ref name,
1870 ref attrs,
1871 ..
1872 } => {
1873 if self.tags.contains(&*name.local) {
1874 let attr_filter = |attr: &html5ever::Attribute| {
1875 let whitelisted = self.generic_attributes.contains(&*attr.name.local)
1876 || self.generic_attribute_prefixes.as_ref().map(|prefixes| {
1877 prefixes.iter().any(|&p| attr.name.local.starts_with(p))
1878 }) == Some(true)
1879 || self
1880 .tag_attributes
1881 .get(&*name.local)
1882 .map(|ta| ta.contains(&*attr.name.local))
1883 == Some(true)
1884 || self
1885 .tag_attribute_values
1886 .get(&*name.local)
1887 .and_then(|tav| tav.get(&*attr.name.local))
1888 .map(|vs| {
1889 let attr_val = attr.value.to_lowercase();
1890 vs.iter().any(|v| v.to_lowercase() == attr_val)
1891 })
1892 == Some(true);
1893 if !whitelisted {
1894 // If the class attribute is not whitelisted,
1895 // but there is a whitelisted set of allowed_classes,
1896 // do not strip out the class attribute.
1897 // Banned classes will be filtered later.
1898 &*attr.name.local == "class"
1899 && self.allowed_classes.contains_key(&*name.local)
1900 } else if is_url_attr(&*name.local, &*attr.name.local) {
1901 let url = Url::parse(&*attr.value);
1902 if let Ok(url) = url {
1903 self.url_schemes.contains(url.scheme())
1904 } else if url == Err(url::ParseError::RelativeUrlWithoutBase) {
1905 !matches!(self.url_relative, UrlRelative::Deny)
1906 } else {
1907 false
1908 }
1909 } else {
1910 true
1911 }
1912 };
1913 attrs.borrow_mut().retain(attr_filter);
1914 true
1915 } else {
1916 false
1917 }
1918 }
1919 }
1920 }
1921
1922 // Check for unexpected namespace changes.
1923 //
1924 // The issue happens if developers added to the list of allowed tags any
1925 // tag which is parsed in RCDATA state, PLAINTEXT state or RAWTEXT state,
1926 // that is:
1927 //
1928 // * title
1929 // * textarea
1930 // * xmp
1931 // * iframe
1932 // * noembed
1933 // * noframes
1934 // * plaintext
1935 // * noscript
1936 // * style
1937 // * script
1938 //
1939 // An example in the wild is Plume, that allows iframe [1]. So in next
1940 // examples I'll assume the following policy:
1941 //
1942 // Builder::new()
1943 // .add_tags(&["iframe"])
1944 //
1945 // In HTML namespace `<iframe>` is parsed specially; that is, its content is
1946 // treated as text. For instance, the following html:
1947 //
1948 // <iframe><a>test
1949 //
1950 // Is parsed into the following DOM tree:
1951 //
1952 // iframe
1953 // └─ #text: <a>test
1954 //
1955 // So iframe cannot have any children other than a text node.
1956 //
1957 // The same is not true, though, in "foreign content"; that is, within
1958 // <svg> or <math> tags. The following html:
1959 //
1960 // <svg><iframe><a>test
1961 //
1962 // is parsed differently:
1963 //
1964 // svg
1965 // └─ iframe
1966 // └─ a
1967 // └─ #text: test
1968 //
1969 // So in SVG namespace iframe can have children.
1970 //
1971 // Ammonia disallows <svg> but it keeps its content after deleting it. And
1972 // the parser internally keeps track of the namespace of the element. So
1973 // assume we have the following snippet:
1974 //
1975 // <svg><iframe><a title="</iframe><img src onerror=alert(1)>">test
1976 //
1977 // It is parsed into:
1978 //
1979 // svg
1980 // └─ iframe
1981 // └─ a title="</iframe><img src onerror=alert(1)>"
1982 // └─ #text: test
1983 //
1984 // This DOM tree is harmless from ammonia point of view because the piece
1985 // of code that looks like XSS is in a title attribute. Hence, the
1986 // resulting "safe" HTML from ammonia would be:
1987 //
1988 // <iframe><a title="</iframe><img src onerror=alert(1)>" rel="noopener
1989 // noreferrer">test</a></iframe>
1990 //
1991 // However, at this point, the information about namespace is lost, which
1992 // means that the browser will parse this snippet into:
1993 //
1994 // ├─ iframe
1995 // │ └─ #text: <a title="
1996 // ├─ img src="" onerror="alert(1)"
1997 // └─ #text: " rel="noopener noreferrer">test
1998 //
1999 // Leading to XSS.
2000 //
2001 // To solve this issue, check for unexpected namespace switches after cleanup.
2002 // Elements which change namespace at an unexpected point are removed.
2003 // This function returns `true` if `child` should be kept, and `false` if it
2004 // should be removed.
2005 //
2006 // [1]: https://github.com/Plume-org/Plume/blob/main/plume-models/src/safe_string.rs#L21
2007 fn check_expected_namespace(&self, parent: &Handle, child: &Handle) -> bool {
2008 let (parent, child) = match (&parent.data, &child.data) {
2009 (NodeData::Element { name: pn, .. }, NodeData::Element { name: cn, .. }) => (pn, cn),
2010 _ => return true,
2011 };
2012 // The only way to switch from html to svg is with the <svg> tag
2013 if parent.ns == ns!(html) && child.ns == ns!(svg) {
2014 child.local == local_name!("svg")
2015 // The only way to switch from html to mathml is with the <math> tag
2016 } else if parent.ns == ns!(html) && child.ns == ns!(mathml) {
2017 child.local == local_name!("math")
2018 // The only way to switch from mathml to svg/html is with a text integration point
2019 } else if parent.ns == ns!(mathml) && child.ns != ns!(mathml) {
2020 // https://html.spec.whatwg.org/#mathml
2021 matches!(
2022 &*parent.local,
2023 "mi" | "mo" | "mn" | "ms" | "mtext" | "annotation-xml"
2024 )
2025 // The only way to switch from svg to mathml/html is with an html integration point
2026 } else if parent.ns == ns!(svg) && child.ns != ns!(svg) {
2027 // https://html.spec.whatwg.org/#svg-0
2028 matches!(&*parent.local, "foreignObject")
2029 } else if child.ns == ns!(svg) {
2030 is_svg_tag(&*child.local)
2031 } else if child.ns == ns!(mathml) {
2032 is_mathml_tag(&*child.local)
2033 } else if child.ns == ns!(html) {
2034 (!is_svg_tag(&*child.local) && !is_mathml_tag(&*child.local))
2035 || matches!(
2036 &*child.local,
2037 "title" | "style" | "font" | "a" | "script" | "span"
2038 )
2039 } else {
2040 // There are no other supported ways to switch namespace
2041 parent.ns == child.ns
2042 }
2043 }
2044
2045 /// Add and transform special-cased attributes and elements.
2046 ///
2047 /// This function handles:
2048 ///
2049 /// * relative URL rewriting
2050 /// * adding `<a rel>` attributes
2051 /// * filtering out banned classes
2052 fn adjust_node_attributes(
2053 &self,
2054 child: &mut Handle,
2055 link_rel: &Option<StrTendril>,
2056 id_prefix: Option<&'a str>,
2057 ) {
2058 if let NodeData::Element {
2059 ref name,
2060 ref attrs,
2061 ..
2062 } = child.data
2063 {
2064 if let Some(set_attrs) = self.set_tag_attribute_values.get(&*name.local) {
2065 let mut attrs = attrs.borrow_mut();
2066 for (&set_name, &set_value) in set_attrs {
2067 // set the value of the attribute if the attribute is already present
2068 if let Some(attr) = attrs.iter_mut().find(|attr| &*attr.name.local == set_name)
2069 {
2070 if &*attr.value != set_value {
2071 attr.value = set_value.into();
2072 }
2073 } else {
2074 // otherwise, add the attribute
2075 let attr = Attribute {
2076 name: QualName::new(None, ns!(), set_name.into()),
2077 value: set_value.into(),
2078 };
2079 attrs.push(attr);
2080 }
2081 }
2082 }
2083 if let Some(ref link_rel) = *link_rel {
2084 if &*name.local == "a" {
2085 attrs.borrow_mut().push(Attribute {
2086 name: QualName::new(None, ns!(), local_name!("rel")),
2087 value: link_rel.clone(),
2088 })
2089 }
2090 }
2091 if let Some(ref id_prefix) = id_prefix {
2092 for attr in &mut *attrs.borrow_mut() {
2093 if &attr.name.local == "id" && !attr.value.starts_with(id_prefix) {
2094 attr.value = format_tendril!("{}{}", id_prefix, attr.value);
2095 }
2096 }
2097 }
2098 if let Some(ref attr_filter) = self.attribute_filter {
2099 let mut drop_attrs = Vec::new();
2100 let mut attrs = attrs.borrow_mut();
2101 for (i, attr) in &mut attrs.iter_mut().enumerate() {
2102 let replace_with = if let Some(new) =
2103 attr_filter.filter(&*name.local, &*attr.name.local, &*attr.value)
2104 {
2105 if *new != *attr.value {
2106 Some(format_tendril!("{}", new))
2107 } else {
2108 None // no need to replace the attr if filter returned the same value
2109 }
2110 } else {
2111 drop_attrs.push(i);
2112 None
2113 };
2114 if let Some(replace_with) = replace_with {
2115 attr.value = replace_with;
2116 }
2117 }
2118 for i in drop_attrs.into_iter().rev() {
2119 attrs.swap_remove(i);
2120 }
2121 }
2122 {
2123 let mut drop_attrs = Vec::new();
2124 let mut attrs = attrs.borrow_mut();
2125 for (i, attr) in attrs.iter_mut().enumerate() {
2126 if is_url_attr(&*name.local, &*attr.name.local) && is_url_relative(&*attr.value)
2127 {
2128 let new_value = self.url_relative.evaluate(&*attr.value);
2129 if let Some(new_value) = new_value {
2130 attr.value = new_value;
2131 } else {
2132 drop_attrs.push(i);
2133 }
2134 }
2135 }
2136 // Swap remove scrambles the vector after the current point.
2137 // We will not do anything except with items before the current point.
2138 // The `rev()` is, as such, necessary for correctness.
2139 // We could use regular `remove(usize)` and a forward iterator,
2140 // but that's slower.
2141 for i in drop_attrs.into_iter().rev() {
2142 attrs.swap_remove(i);
2143 }
2144 }
2145 if let Some(allowed_values) = self.allowed_classes.get(&*name.local) {
2146 for attr in &mut *attrs.borrow_mut() {
2147 if &attr.name.local == "class" {
2148 let mut classes = vec![];
2149 // https://html.spec.whatwg.org/#global-attributes:classes-2
2150 for class in attr.value.split_ascii_whitespace() {
2151 if allowed_values.contains(class) {
2152 classes.push(class.to_owned());
2153 }
2154 }
2155 attr.value = format_tendril!("{}", classes.join(" "));
2156 }
2157 }
2158 }
2159 }
2160 }
2161
2162 /// Initializes an HTML fragment parser.
2163 ///
2164 /// Ammonia conforms to the HTML5 fragment parsing rules,
2165 /// by parsing the given fragment as if it were included in a <div> tag.
2166 fn make_parser() -> html::Parser<RcDom> {
2167 html::parse_fragment(
2168 RcDom::default(),
2169 html::ParseOpts::default(),
2170 QualName::new(None, ns!(html), local_name!("div")),
2171 vec![],
2172 )
2173 }
2174}
2175
2176/// Given an element name and attribute name, determine if the given attribute contains a URL.
2177fn is_url_attr(element: &str, attr: &str) -> bool {
2178 attr == "href"
2179 || attr == "src"
2180 || (element == "form" && attr == "action")
2181 || (element == "object" && attr == "data")
2182 || ((element == "button" || element == "input") && attr == "formaction")
2183 || (element == "a" && attr == "ping")
2184 || (element == "video" && attr == "poster")
2185}
2186
2187/// Given an element name, check if it's SVG
2188fn is_svg_tag(element: &str) -> bool {
2189 // https://svgwg.org/svg2-draft/eltindex.html
2190 matches!(
2191 element,
2192 "a"
2193 | "animate"
2194 | "animateMotion"
2195 | "animateTransform"
2196 | "circle"
2197 | "clipPath"
2198 | "defs"
2199 | "desc"
2200 | "discard"
2201 | "ellipse"
2202 | "feBlend"
2203 | "feColorMatrix"
2204 | "feComponentTransfer"
2205 | "feComposite"
2206 | "feConvolveMatrix"
2207 | "feDiffuseLighting"
2208 | "feDisplacementMap"
2209 | "feDistantLight"
2210 | "feDropShadow"
2211 | "feFlood"
2212 | "feFuncA"
2213 | "feFuncB"
2214 | "feFuncG"
2215 | "feFuncR"
2216 | "feGaussianBlur"
2217 | "feImage"
2218 | "feMerge"
2219 | "feMergeNode"
2220 | "feMorphology"
2221 | "feOffset"
2222 | "fePointLight"
2223 | "feSpecularLighting"
2224 | "feSpotLight"
2225 | "feTile"
2226 | "feTurbulence"
2227 | "filter"
2228 | "foreignObject"
2229 | "g"
2230 | "image"
2231 | "line"
2232 | "linearGradient"
2233 | "marker"
2234 | "mask"
2235 | "metadata"
2236 | "mpath"
2237 | "path"
2238 | "pattern"
2239 | "polygon"
2240 | "polyline"
2241 | "radialGradient"
2242 | "rect"
2243 | "script"
2244 | "set"
2245 | "stop"
2246 | "style"
2247 | "svg"
2248 | "switch"
2249 | "symbol"
2250 | "text"
2251 | "textPath"
2252 | "title"
2253 | "tspan"
2254 | "use"
2255 | "view"
2256 )
2257}
2258
2259/// Given an element name, check if it's Math
2260fn is_mathml_tag(element: &str) -> bool {
2261 // https://svgwg.org/svg2-draft/eltindex.html
2262 matches!(element,
2263 "abs"
2264 | "and"
2265 | "annotation"
2266 | "annotation-xml"
2267 | "apply"
2268 | "approx"
2269 | "arccos"
2270 | "arccosh"
2271 | "arccot"
2272 | "arccoth"
2273 | "arccsc"
2274 | "arccsch"
2275 | "arcsec"
2276 | "arcsech"
2277 | "arcsin"
2278 | "arcsinh"
2279 | "arctan"
2280 | "arctanh"
2281 | "arg"
2282 | "bind"
2283 | "bvar"
2284 | "card"
2285 | "cartesianproduct"
2286 | "cbytes"
2287 | "ceiling"
2288 | "cerror"
2289 | "ci"
2290 | "cn"
2291 | "codomain"
2292 | "complexes"
2293 | "compose"
2294 | "condition"
2295 | "conjugate"
2296 | "cos"
2297 | "cosh"
2298 | "cot"
2299 | "coth"
2300 | "cs"
2301 | "csc"
2302 | "csch"
2303 | "csymbol"
2304 | "curl"
2305 | "declare"
2306 | "degree"
2307 | "determinant"
2308 | "diff"
2309 | "divergence"
2310 | "divide"
2311 | "domain"
2312 | "domainofapplication"
2313 | "emptyset"
2314 | "eq"
2315 | "equivalent"
2316 | "eulergamma"
2317 | "exists"
2318 | "exp"
2319 | "exponentiale"
2320 | "factorial"
2321 | "factorof"
2322 | "false"
2323 | "floor"
2324 | "fn"
2325 | "forall"
2326 | "gcd"
2327 | "geq"
2328 | "grad"
2329 | "gt"
2330 | "ident"
2331 | "image"
2332 | "imaginary"
2333 | "imaginaryi"
2334 | "implies"
2335 | "in"
2336 | "infinity"
2337 | "int"
2338 | "integers"
2339 | "intersect"
2340 | "interval"
2341 | "inverse"
2342 | "lambda"
2343 | "laplacian"
2344 | "lcm"
2345 | "leq"
2346 | "limit"
2347 | "list"
2348 | "ln"
2349 | "log"
2350 | "logbase"
2351 | "lowlimit"
2352 | "lt"
2353 | "maction"
2354 | "maligngroup"
2355 | "malignmark"
2356 | "math"
2357 | "matrix"
2358 | "matrixrow"
2359 | "max"
2360 | "mean"
2361 | "median"
2362 | "menclose"
2363 | "merror"
2364 | "mfenced"
2365 | "mfrac"
2366 | "mglyph"
2367 | "mi"
2368 | "min"
2369 | "minus"
2370 | "mlabeledtr"
2371 | "mlongdiv"
2372 | "mmultiscripts"
2373 | "mn"
2374 | "mo"
2375 | "mode"
2376 | "moment"
2377 | "momentabout"
2378 | "mover"
2379 | "mpadded"
2380 | "mphantom"
2381 | "mprescripts"
2382 | "mroot"
2383 | "mrow"
2384 | "ms"
2385 | "mscarries"
2386 | "mscarry"
2387 | "msgroup"
2388 | "msline"
2389 | "mspace"
2390 | "msqrt"
2391 | "msrow"
2392 | "mstack"
2393 | "mstyle"
2394 | "msub"
2395 | "msubsup"
2396 | "msup"
2397 | "mtable"
2398 | "mtd"
2399 | "mtext"
2400 | "mtr"
2401 | "munder"
2402 | "munderover"
2403 | "naturalnumbers"
2404 | "neq"
2405 | "none"
2406 | "not"
2407 | "notanumber"
2408 | "notin"
2409 | "notprsubset"
2410 | "notsubset"
2411 | "or"
2412 | "otherwise"
2413 | "outerproduct"
2414 | "partialdiff"
2415 | "pi"
2416 | "piece"
2417 | "piecewise"
2418 | "plus"
2419 | "power"
2420 | "primes"
2421 | "product"
2422 | "prsubset"
2423 | "quotient"
2424 | "rationals"
2425 | "real"
2426 | "reals"
2427 | "reln"
2428 | "rem"
2429 | "root"
2430 | "scalarproduct"
2431 | "sdev"
2432 | "sec"
2433 | "sech"
2434 | "selector"
2435 | "semantics"
2436 | "sep"
2437 | "set"
2438 | "setdiff"
2439 | "share"
2440 | "sin"
2441 | "sinh"
2442 | "span"
2443 | "subset"
2444 | "sum"
2445 | "tan"
2446 | "tanh"
2447 | "tendsto"
2448 | "times"
2449 | "transpose"
2450 | "true"
2451 | "union"
2452 | "uplimit"
2453 | "variance"
2454 | "vector"
2455 | "vectorproduct"
2456 | "xor"
2457 )
2458}
2459
2460fn is_url_relative(url: &str) -> bool {
2461 matches!(
2462 Url::parse(url),
2463 Err(url::ParseError::RelativeUrlWithoutBase)
2464 )
2465}
2466
2467/// Policy for [relative URLs], that is, URLs that do not specify the scheme in full.
2468///
2469/// This policy kicks in, if set, for any attribute named `src` or `href`,
2470/// as well as the `data` attribute of an `object` tag.
2471///
2472/// [relative URLs]: struct.Builder.html#method.url_relative
2473///
2474/// # Examples
2475///
2476/// ## `Deny`
2477///
2478/// * `<a href="test">` is a file-relative URL, and will be removed
2479/// * `<a href="/test">` is a domain-relative URL, and will be removed
2480/// * `<a href="//example.com/test">` is a scheme-relative URL, and will be removed
2481/// * `<a href="http://example.com/test">` is an absolute URL, and will be kept
2482///
2483/// ## `PassThrough`
2484///
2485/// No changes will be made to any URLs, except if a disallowed scheme is used.
2486///
2487/// ## `RewriteWithBase`
2488///
2489/// If the base is set to `http://notriddle.com/some-directory/some-file`
2490///
2491/// * `<a href="test">` will be rewritten to `<a href="http://notriddle.com/some-directory/test">`
2492/// * `<a href="/test">` will be rewritten to `<a href="http://notriddle.com/test">`
2493/// * `<a href="//example.com/test">` will be rewritten to `<a href="http://example.com/test">`
2494/// * `<a href="http://example.com/test">` is an absolute URL, so it will be kept as-is
2495///
2496/// ## `Custom`
2497///
2498/// Pass the relative URL to a function.
2499/// If it returns `Some(string)`, then that one gets used.
2500/// Otherwise, it will remove the attribute (like `Deny` does).
2501///
2502/// use std::borrow::Cow;
2503/// fn is_absolute_path(url: &str) -> bool {
2504/// let u = url.as_bytes();
2505/// // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
2506/// // `/a/b/c` is an absolute path, and what we want to do stuff to.
2507/// u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
2508/// }
2509/// fn evaluate(url: &str) -> Option<Cow<str>> {
2510/// if is_absolute_path(url) {
2511/// Some(Cow::Owned(String::from("/root") + url))
2512/// } else {
2513/// Some(Cow::Borrowed(url))
2514/// }
2515/// }
2516/// fn main() {
2517/// let a = ammonia::Builder::new()
2518/// .url_relative(ammonia::UrlRelative::Custom(Box::new(evaluate)))
2519/// .clean("<a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
2520/// .to_string();
2521/// assert_eq!(a, "<a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
2522/// }
2523///
2524/// This function is only applied to relative URLs.
2525/// To filter all of the URLs,
2526/// use the not-yet-implemented Content Security Policy.
2527#[non_exhaustive]
2528pub enum UrlRelative {
2529 /// Relative URLs will be completely stripped from the document.
2530 Deny,
2531 /// Relative URLs will be passed through unchanged.
2532 PassThrough,
2533 /// Relative URLs will be changed into absolute URLs, based on this base URL.
2534 RewriteWithBase(Url),
2535 /// Force absolute and relative paths into a particular directory.
2536 ///
2537 /// Since the resolver does not affect fully-qualified URLs, it doesn't
2538 /// prevent users from linking wherever they want. This feature only
2539 /// serves to make content more portable.
2540 ///
2541 /// # Examples
2542 ///
2543 /// <table>
2544 /// <thead>
2545 /// <tr>
2546 /// <th>root</th>
2547 /// <th>path</th>
2548 /// <th>url</th>
2549 /// <th>result</th>
2550 /// </tr>
2551 /// </thead>
2552 /// <tbody>
2553 /// <tr>
2554 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2555 /// <td>README.md</td>
2556 /// <td></td>
2557 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td>
2558 /// </tr><tr>
2559 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2560 /// <td>README.md</td>
2561 /// <td>/</td>
2562 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2563 /// </tr><tr>
2564 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2565 /// <td>README.md</td>
2566 /// <td>/CONTRIBUTING.md</td>
2567 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2568 /// </tr><tr>
2569 /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2570 /// <td>README.md</td>
2571 /// <td></td>
2572 /// <td>https://github.com/rust-ammonia/ammonia/blob/README.md</td>
2573 /// </tr><tr>
2574 /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2575 /// <td>README.md</td>
2576 /// <td>/</td>
2577 /// <td>https://github.com/rust-ammonia/ammonia/blob/</td>
2578 /// </tr><tr>
2579 /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2580 /// <td>README.md</td>
2581 /// <td>/CONTRIBUTING.md</td>
2582 /// <td>https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md</td>
2583 /// </tr><tr>
2584 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2585 /// <td></td>
2586 /// <td></td>
2587 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2588 /// </tr><tr>
2589 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2590 /// <td></td>
2591 /// <td>/</td>
2592 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2593 /// </tr><tr>
2594 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2595 /// <td></td>
2596 /// <td>/CONTRIBUTING.md</td>
2597 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2598 /// </tr><tr>
2599 /// <td>https://github.com/</td>
2600 /// <td>rust-ammonia/ammonia/blob/master/README.md</td>
2601 /// <td></td>
2602 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td>
2603 /// </tr><tr>
2604 /// <td>https://github.com/</td>
2605 /// <td>rust-ammonia/ammonia/blob/master/README.md</td>
2606 /// <td>/</td>
2607 /// <td>https://github.com/</td>
2608 /// </tr><tr>
2609 /// <td>https://github.com/</td>
2610 /// <td>rust-ammonia/ammonia/blob/master/README.md</td>
2611 /// <td>CONTRIBUTING.md</td>
2612 /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2613 /// </tr><tr>
2614 /// <td>https://github.com/</td>
2615 /// <td>rust-ammonia/ammonia/blob/master/README.md</td>
2616 /// <td>/CONTRIBUTING.md</td>
2617 /// <td>https://github.com/CONTRIBUTING.md</td>
2618 /// </tr>
2619 /// </tbody>
2620 /// </table>
2621 RewriteWithRoot {
2622 /// The URL that is treated as the root by the resolver.
2623 root: Url,
2624 /// The "current path" used to resolve relative paths.
2625 path: String,
2626 },
2627 /// Rewrite URLs with a custom function.
2628 Custom(Box<dyn UrlRelativeEvaluate>),
2629}
2630
2631impl UrlRelative {
2632 fn evaluate(&self, url: &str) -> Option<tendril::StrTendril> {
2633 match self {
2634 UrlRelative::RewriteWithBase(ref url_base) => {
2635 url_base.join(url).ok().and_then(|x| StrTendril::from_str(x.as_str()).ok())
2636 }
2637 UrlRelative::RewriteWithRoot { ref root, ref path } => {
2638 (match url.as_bytes() {
2639 // Scheme-relative URL
2640 [b'/', b'/', ..] => root.join(url),
2641 // Path-absolute URL
2642 b"/" => root.join("."),
2643 [b'/', ..] => root.join(&url[1..]),
2644 // Path-relative URL
2645 _ => root.join(path).and_then(|r| r.join(url)),
2646 }).ok().and_then(|x| StrTendril::from_str(x.as_str()).ok())
2647 }
2648 UrlRelative::Custom(ref evaluate) => {
2649 evaluate
2650 .evaluate(&*url)
2651 .as_ref()
2652 .map(Cow::as_ref)
2653 .map(StrTendril::from_str)
2654 .and_then(Result::ok)
2655 }
2656 UrlRelative::PassThrough => StrTendril::from_str(url).ok(),
2657 UrlRelative::Deny => None,
2658 }
2659 }
2660}
2661
2662impl fmt::Debug for UrlRelative {
2663 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2664 match *self {
2665 UrlRelative::Deny => write!(f, "UrlRelative::Deny"),
2666 UrlRelative::PassThrough => write!(f, "UrlRelative::PassThrough"),
2667 UrlRelative::RewriteWithBase(ref base: &Url) => {
2668 write!(f, "UrlRelative::RewriteWithBase({})", base)
2669 }
2670 UrlRelative::RewriteWithRoot { ref root: &Url, ref path: &String } => {
2671 write!(f, "UrlRelative::RewriteWithRoot {{ root: {root}, path: {path} }}")
2672 }
2673 UrlRelative::Custom(_) => write!(f, "UrlRelative::Custom"),
2674 }
2675 }
2676}
2677
2678/// Types that implement this trait can be used to convert a relative URL into an absolute URL.
2679///
2680/// This evaluator is only called when the URL is relative; absolute URLs are not evaluated.
2681///
2682/// See [`url_relative`][url_relative] for more details.
2683///
2684/// [url_relative]: struct.Builder.html#method.url_relative
2685pub trait UrlRelativeEvaluate: Send + Sync {
2686 /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
2687 fn evaluate<'a>(&self, _: &'a str) -> Option<Cow<'a, str>>;
2688}
2689impl<T> UrlRelativeEvaluate for T
2690where
2691 T: Fn(&str) -> Option<Cow<'_, str>> + Send + Sync,
2692{
2693 fn evaluate<'a>(&self, url: &'a str) -> Option<Cow<'a, str>> {
2694 self(url)
2695 }
2696}
2697
2698impl fmt::Debug for dyn AttributeFilter {
2699 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2700 f.write_str(data:"AttributeFilter")
2701 }
2702}
2703
2704/// Types that implement this trait can be used to remove or rewrite arbitrary attributes.
2705///
2706/// See [`attribute_filter`][attribute_filter] for more details.
2707///
2708/// [attribute_filter]: struct.Builder.html#method.attribute_filter
2709pub trait AttributeFilter: Send + Sync {
2710 /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
2711 fn filter<'a>(&self, _: &str, _: &str, _: &'a str) -> Option<Cow<'a, str>>;
2712}
2713
2714impl<T> AttributeFilter for T
2715where
2716 T: for<'a> Fn(&str, &str, &'a str) -> Option<Cow<'a, str>> + Send + Sync + 'static,
2717{
2718 fn filter<'a>(&self, element: &str, attribute: &str, value: &'a str) -> Option<Cow<'a, str>> {
2719 self(element, attribute, value)
2720 }
2721}
2722
2723/// A sanitized HTML document.
2724///
2725/// The `Document` type is an opaque struct representing an HTML fragment that was sanitized by
2726/// `ammonia`. It can be converted to a [`String`] or written to a [`Write`] instance. This allows
2727/// users to avoid buffering the serialized representation to a [`String`] when desired.
2728///
2729/// This type is opaque to insulate the caller from breaking changes in the `html5ever` interface.
2730///
2731/// Note that this type wraps an `html5ever` DOM tree. `ammonia` does not support streaming, so
2732/// the complete fragment needs to be stored in memory during processing.
2733///
2734/// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html
2735/// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
2736///
2737/// # Examples
2738///
2739/// use ammonia::Builder;
2740///
2741/// let input = "<!-- comments will be stripped -->This is an Ammonia example.";
2742/// let output = "This is an Ammonia example.";
2743///
2744/// let document = Builder::new()
2745/// .clean(input);
2746/// assert_eq!(document.to_string(), output);
2747pub struct Document(RcDom);
2748
2749impl Document {
2750 /// Serializes a `Document` instance to a `String`.
2751 ///
2752 /// This method returns a [`String`] with the sanitized HTML. This is the simplest way to use
2753 /// `ammonia`.
2754 ///
2755 /// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html
2756 ///
2757 /// # Examples
2758 ///
2759 /// use ammonia::Builder;
2760 ///
2761 /// let input = "Some <style></style>HTML here";
2762 /// let output = "Some HTML here";
2763 ///
2764 /// let document = Builder::new()
2765 /// .clean(input);
2766 /// assert_eq!(document.to_string(), output);
2767 pub fn to_string(&self) -> String {
2768 let opts = Self::serialize_opts();
2769 let mut ret_val = Vec::new();
2770 let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
2771 serialize(&mut ret_val, &inner, opts)
2772 .expect("Writing to a string shouldn't fail (expect on OOM)");
2773 String::from_utf8(ret_val).expect("html5ever only supports UTF8")
2774 }
2775
2776 /// Serializes a `Document` instance to a writer.
2777 ///
2778 /// This method writes the sanitized HTML to a [`Write`] instance, avoiding a buffering step.
2779 ///
2780 /// To avoid consuming the writer, a mutable reference can be passed, like in the example below.
2781 ///
2782 /// Note that the in-memory representation of `Document` is larger than the serialized
2783 /// `String`.
2784 ///
2785 /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
2786 ///
2787 /// # Examples
2788 ///
2789 /// use ammonia::Builder;
2790 ///
2791 /// let input = "Some <style></style>HTML here";
2792 /// let expected = b"Some HTML here";
2793 ///
2794 /// let document = Builder::new()
2795 /// .clean(input);
2796 ///
2797 /// let mut sanitized = Vec::new();
2798 /// document.write_to(&mut sanitized)
2799 /// .expect("Writing to a string should not fail (except on OOM)");
2800 /// assert_eq!(sanitized, expected);
2801 pub fn write_to<W>(&self, writer: W) -> io::Result<()>
2802 where
2803 W: io::Write,
2804 {
2805 let opts = Self::serialize_opts();
2806 let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
2807 serialize(writer, &inner, opts)
2808 }
2809
2810 /// Exposes the `Document` instance as an [`rcdom::Handle`].
2811 ///
2812 /// This method returns the inner object backing the `Document` instance. This allows
2813 /// making further changes to the DOM without introducing redundant serialization and
2814 /// parsing.
2815 ///
2816 /// Note that this method should be considered unstable and sits outside of the semver
2817 /// stability guarantees. It may change, break, or go away at any time, either because
2818 /// of `html5ever` changes or `ammonia` implementation changes.
2819 ///
2820 /// For this method to be accessible, a `cfg` flag is required. The easiest way is to
2821 /// use the `RUSTFLAGS` environment variable:
2822 ///
2823 /// ```text
2824 /// RUSTFLAGS='--cfg ammonia_unstable' cargo build
2825 /// ```
2826 ///
2827 /// on Unix-like platforms, or
2828 ///
2829 /// ```text
2830 /// set RUSTFLAGS=--cfg ammonia_unstable
2831 /// cargo build
2832 /// ```
2833 ///
2834 /// on Windows.
2835 ///
2836 /// This requirement also applies to crates that transitively depend on crates that use
2837 /// this flag.
2838 ///
2839 /// # Examples
2840 ///
2841 /// use ammonia::Builder;
2842 /// use maplit::hashset;
2843 /// use html5ever::serialize::{serialize, SerializeOpts};
2844 ///
2845 /// # use std::error::Error;
2846 /// # fn do_main() -> Result<(), Box<Error>> {
2847 /// let input = "<a>one link</a> and <a>one more</a>";
2848 /// let expected = "<a>one more</a> and <a>one link</a>";
2849 ///
2850 /// let document = Builder::new()
2851 /// .link_rel(None)
2852 /// .clean(input);
2853 ///
2854 /// let mut node = document.to_dom_node();
2855 /// node.children.borrow_mut().reverse();
2856 ///
2857 /// let mut buf = Vec::new();
2858 /// serialize(&mut buf, &node, SerializeOpts::default())?;
2859 /// let output = String::from_utf8(buf)?;
2860 ///
2861 /// assert_eq!(output, expected);
2862 /// # Ok(())
2863 /// # }
2864 /// # fn main() { do_main().unwrap() }
2865 #[cfg(ammonia_unstable)]
2866 pub fn to_dom_node(&self) -> Handle {
2867 self.0.document.children.borrow()[0].clone()
2868 }
2869
2870 fn serialize_opts() -> SerializeOpts {
2871 SerializeOpts::default()
2872 }
2873}
2874
2875impl Clone for Document {
2876 fn clone(&self) -> Self {
2877 let parser: Parser = Builder::make_parser();
2878 let dom: RcDom = parser.one(&self.to_string()[..]);
2879 Document(dom)
2880 }
2881}
2882
2883impl fmt::Display for Document {
2884 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2885 write!(f, "{}", self.to_string())
2886 }
2887}
2888
2889impl fmt::Debug for Document {
2890 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2891 write!(f, "Document({})", self.to_string())
2892 }
2893}
2894
2895impl From<Document> for String {
2896 fn from(document: Document) -> Self {
2897 document.to_string()
2898 }
2899}
2900
2901#[cfg(test)]
2902mod test {
2903 use super::*;
2904 #[test]
2905 fn deeply_nested_whitelisted() {
2906 clean(&"<b>".repeat(60_000));
2907 }
2908 #[test]
2909 fn deeply_nested_blacklisted() {
2910 clean(&"<b-b>".repeat(60_000));
2911 }
2912 #[test]
2913 fn deeply_nested_alternating() {
2914 clean(&"<b-b>".repeat(35_000));
2915 }
2916 #[test]
2917 fn included_angles() {
2918 let fragment = "1 < 2";
2919 let result = clean(fragment);
2920 assert_eq!(result, "1 &lt; 2");
2921 }
2922 #[test]
2923 fn remove_script() {
2924 let fragment = "an <script>evil()</script> example";
2925 let result = clean(fragment);
2926 assert_eq!(result, "an example");
2927 }
2928 #[test]
2929 fn ignore_link() {
2930 let fragment = "a <a href=\"http://www.google.com\">good</a> example";
2931 let expected = "a <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">\
2932 good</a> example";
2933 let result = clean(fragment);
2934 assert_eq!(result, expected);
2935 }
2936 #[test]
2937 fn remove_unsafe_link() {
2938 let fragment = "an <a onclick=\"evil()\" href=\"http://www.google.com\">evil</a> example";
2939 let result = clean(fragment);
2940 assert_eq!(
2941 result,
2942 "an <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">evil</a> example"
2943 );
2944 }
2945 #[test]
2946 fn remove_js_link() {
2947 let fragment = "an <a href=\"javascript:evil()\">evil</a> example";
2948 let result = clean(fragment);
2949 assert_eq!(result, "an <a rel=\"noopener noreferrer\">evil</a> example");
2950 }
2951 #[test]
2952 fn tag_rebalance() {
2953 let fragment = "<b>AWESOME!";
2954 let result = clean(fragment);
2955 assert_eq!(result, "<b>AWESOME!</b>");
2956 }
2957 #[test]
2958 fn allow_url_relative() {
2959 let fragment = "<a href=test>Test</a>";
2960 let result = Builder::new()
2961 .url_relative(UrlRelative::PassThrough)
2962 .clean(fragment)
2963 .to_string();
2964 assert_eq!(
2965 result,
2966 "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
2967 );
2968 }
2969 #[test]
2970 fn rewrite_url_relative() {
2971 let fragment = "<a href=test>Test</a>";
2972 let result = Builder::new()
2973 .url_relative(UrlRelative::RewriteWithBase(
2974 Url::parse("http://example.com/").unwrap(),
2975 ))
2976 .clean(fragment)
2977 .to_string();
2978 assert_eq!(
2979 result,
2980 "<a href=\"http://example.com/test\" rel=\"noopener noreferrer\">Test</a>"
2981 );
2982 }
2983 #[test]
2984 fn rewrite_url_relative_with_invalid_url() {
2985 // Reduced from https://github.com/Bauke/ammonia-crash-test
2986 let fragment = r##"<a href="\\"https://example.com\\"">test</a>"##;
2987 let result = Builder::new()
2988 .url_relative(UrlRelative::RewriteWithBase(
2989 Url::parse("http://example.com/").unwrap(),
2990 ))
2991 .clean(fragment)
2992 .to_string();
2993 assert_eq!(result, r##"<a rel="noopener noreferrer">test</a>"##);
2994 }
2995 #[test]
2996 fn attribute_filter_nop() {
2997 let fragment = "<a href=test>Test</a>";
2998 let result = Builder::new()
2999 .attribute_filter(|elem, attr, value| {
3000 assert_eq!("a", elem);
3001 assert!(
3002 match (attr, value) {
3003 ("href", "test") => true,
3004 ("rel", "noopener noreferrer") => true,
3005 _ => false,
3006 },
3007 "{}",
3008 value.to_string()
3009 );
3010 Some(value.into())
3011 })
3012 .clean(fragment)
3013 .to_string();
3014 assert_eq!(
3015 result,
3016 "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3017 );
3018 }
3019
3020 #[test]
3021 fn attribute_filter_drop() {
3022 let fragment = "Test<img alt=test src=imgtest>";
3023 let result = Builder::new()
3024 .attribute_filter(|elem, attr, value| {
3025 assert_eq!("img", elem);
3026 match (attr, value) {
3027 ("src", "imgtest") => None,
3028 ("alt", "test") => Some(value.into()),
3029 _ => panic!("unexpected"),
3030 }
3031 })
3032 .clean(fragment)
3033 .to_string();
3034 assert_eq!(result, r#"Test<img alt="test">"#);
3035 }
3036
3037 #[test]
3038 fn url_filter_absolute() {
3039 let fragment = "Test<img alt=test src=imgtest>";
3040 let result = Builder::new()
3041 .attribute_filter(|elem, attr, value| {
3042 assert_eq!("img", elem);
3043 match (attr, value) {
3044 ("src", "imgtest") => {
3045 Some(format!("https://example.com/images/{}", value).into())
3046 }
3047 ("alt", "test") => None,
3048 _ => panic!("unexpected"),
3049 }
3050 })
3051 .url_relative(UrlRelative::RewriteWithBase(
3052 Url::parse("http://wrong.invalid/").unwrap(),
3053 ))
3054 .clean(fragment)
3055 .to_string();
3056 assert_eq!(
3057 result,
3058 r#"Test<img src="https://example.com/images/imgtest">"#
3059 );
3060 }
3061
3062 #[test]
3063 fn url_filter_relative() {
3064 let fragment = "Test<img alt=test src=imgtest>";
3065 let result = Builder::new()
3066 .attribute_filter(|elem, attr, value| {
3067 assert_eq!("img", elem);
3068 match (attr, value) {
3069 ("src", "imgtest") => Some("rewrite".into()),
3070 ("alt", "test") => Some("altalt".into()),
3071 _ => panic!("unexpected"),
3072 }
3073 })
3074 .url_relative(UrlRelative::RewriteWithBase(
3075 Url::parse("https://example.com/base/#").unwrap(),
3076 ))
3077 .clean(fragment)
3078 .to_string();
3079 assert_eq!(
3080 result,
3081 r#"Test<img alt="altalt" src="https://example.com/base/rewrite">"#
3082 );
3083 }
3084
3085 #[test]
3086 fn rewrite_url_relative_no_rel() {
3087 let fragment = "<a href=test>Test</a>";
3088 let result = Builder::new()
3089 .url_relative(UrlRelative::RewriteWithBase(
3090 Url::parse("http://example.com/").unwrap(),
3091 ))
3092 .link_rel(None)
3093 .clean(fragment)
3094 .to_string();
3095 assert_eq!(result, "<a href=\"http://example.com/test\">Test</a>");
3096 }
3097 #[test]
3098 fn deny_url_relative() {
3099 let fragment = "<a href=test>Test</a>";
3100 let result = Builder::new()
3101 .url_relative(UrlRelative::Deny)
3102 .clean(fragment)
3103 .to_string();
3104 assert_eq!(result, "<a rel=\"noopener noreferrer\">Test</a>");
3105 }
3106 #[test]
3107 fn replace_rel() {
3108 let fragment = "<a href=test rel=\"garbage\">Test</a>";
3109 let result = Builder::new()
3110 .url_relative(UrlRelative::PassThrough)
3111 .clean(fragment)
3112 .to_string();
3113 assert_eq!(
3114 result,
3115 "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3116 );
3117 }
3118 #[test]
3119 fn consider_rel_still_banned() {
3120 let fragment = "<a href=test rel=\"garbage\">Test</a>";
3121 let result = Builder::new()
3122 .url_relative(UrlRelative::PassThrough)
3123 .link_rel(None)
3124 .clean(fragment)
3125 .to_string();
3126 assert_eq!(result, "<a href=\"test\">Test</a>");
3127 }
3128 #[test]
3129 fn object_data() {
3130 let fragment = "<span data=\"javascript:evil()\">Test</span>\
3131 <object data=\"javascript:evil()\"></object>M";
3132 let expected = r#"<span data="javascript:evil()">Test</span><object></object>M"#;
3133 let result = Builder::new()
3134 .tags(hashset!["span", "object"])
3135 .generic_attributes(hashset!["data"])
3136 .clean(fragment)
3137 .to_string();
3138 assert_eq!(result, expected);
3139 }
3140 #[test]
3141 fn remove_attributes() {
3142 let fragment = "<table border=\"1\"><tr></tr></table>";
3143 let result = Builder::new().clean(fragment);
3144 assert_eq!(
3145 result.to_string(),
3146 "<table><tbody><tr></tr></tbody></table>"
3147 );
3148 }
3149 #[test]
3150 fn quotes_in_attrs() {
3151 let fragment = "<b title='\"'>contents</b>";
3152 let result = clean(fragment);
3153 assert_eq!(result, "<b title=\"&quot;\">contents</b>");
3154 }
3155 #[test]
3156 #[should_panic]
3157 fn panic_if_rel_is_allowed_and_replaced_generic() {
3158 Builder::new()
3159 .link_rel(Some("noopener noreferrer"))
3160 .generic_attributes(hashset!["rel"])
3161 .clean("something");
3162 }
3163 #[test]
3164 #[should_panic]
3165 fn panic_if_rel_is_allowed_and_replaced_a() {
3166 Builder::new()
3167 .link_rel(Some("noopener noreferrer"))
3168 .tag_attributes(hashmap![
3169 "a" => hashset!["rel"],
3170 ])
3171 .clean("something");
3172 }
3173 #[test]
3174 fn no_panic_if_rel_is_allowed_and_replaced_span() {
3175 Builder::new()
3176 .link_rel(Some("noopener noreferrer"))
3177 .tag_attributes(hashmap![
3178 "span" => hashset!["rel"],
3179 ])
3180 .clean("<span rel=\"what\">s</span>");
3181 }
3182 #[test]
3183 fn no_panic_if_rel_is_allowed_and_not_replaced_generic() {
3184 Builder::new()
3185 .link_rel(None)
3186 .generic_attributes(hashset!["rel"])
3187 .clean("<a rel=\"what\">s</a>");
3188 }
3189 #[test]
3190 fn no_panic_if_rel_is_allowed_and_not_replaced_a() {
3191 Builder::new()
3192 .link_rel(None)
3193 .tag_attributes(hashmap![
3194 "a" => hashset!["rel"],
3195 ])
3196 .clean("<a rel=\"what\">s</a>");
3197 }
3198 #[test]
3199 fn dont_close_void_elements() {
3200 let fragment = "<br>";
3201 let result = clean(fragment);
3202 assert_eq!(result.to_string(), "<br>");
3203 }
3204 #[should_panic]
3205 #[test]
3206 fn panic_on_allowed_classes_tag_attributes() {
3207 let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3208 Builder::new()
3209 .link_rel(None)
3210 .tag_attributes(hashmap![
3211 "p" => hashset!["class"],
3212 "a" => hashset!["class"],
3213 ])
3214 .allowed_classes(hashmap![
3215 "p" => hashset!["foo", "bar"],
3216 "a" => hashset!["baz"],
3217 ])
3218 .clean(fragment);
3219 }
3220 #[should_panic]
3221 #[test]
3222 fn panic_on_allowed_classes_generic_attributes() {
3223 let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3224 Builder::new()
3225 .link_rel(None)
3226 .generic_attributes(hashset!["class", "href", "some-foo"])
3227 .allowed_classes(hashmap![
3228 "p" => hashset!["foo", "bar"],
3229 "a" => hashset!["baz"],
3230 ])
3231 .clean(fragment);
3232 }
3233 #[test]
3234 fn remove_non_allowed_classes() {
3235 let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3236 let result = Builder::new()
3237 .link_rel(None)
3238 .allowed_classes(hashmap![
3239 "p" => hashset!["foo", "bar"],
3240 "a" => hashset!["baz"],
3241 ])
3242 .clean(fragment);
3243 assert_eq!(
3244 result.to_string(),
3245 "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
3246 );
3247 }
3248 #[test]
3249 fn remove_non_allowed_classes_with_tag_class() {
3250 let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3251 let result = Builder::new()
3252 .link_rel(None)
3253 .tag_attributes(hashmap![
3254 "div" => hashset!["class"],
3255 ])
3256 .allowed_classes(hashmap![
3257 "p" => hashset!["foo", "bar"],
3258 "a" => hashset!["baz"],
3259 ])
3260 .clean(fragment);
3261 assert_eq!(
3262 result.to_string(),
3263 "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
3264 );
3265 }
3266 #[test]
3267 fn allowed_classes_ascii_whitespace() {
3268 // According to https://infra.spec.whatwg.org/#ascii-whitespace,
3269 // TAB (\t), LF (\n), FF (\x0C), CR (\x0D) and SPACE (\x20) are
3270 // considered to be ASCII whitespace. Unicode whitespace characters
3271 // and VT (\x0B) aren't ASCII whitespace.
3272 let fragment = "<p class=\"a\tb\nc\x0Cd\re f\x0B g\u{2000}\">";
3273 let result = Builder::new()
3274 .allowed_classes(hashmap![
3275 "p" => hashset!["a", "b", "c", "d", "e", "f", "g"],
3276 ])
3277 .clean(fragment);
3278 assert_eq!(result.to_string(), r#"<p class="a b c d e"></p>"#);
3279 }
3280 #[test]
3281 fn remove_non_allowed_attributes_with_tag_attribute_values() {
3282 let fragment = "<p data-label=\"baz\" name=\"foo\"></p>";
3283 let result = Builder::new()
3284 .tag_attribute_values(hashmap![
3285 "p" => hashmap![
3286 "data-label" => hashset!["bar"],
3287 ],
3288 ])
3289 .tag_attributes(hashmap![
3290 "p" => hashset!["name"],
3291 ])
3292 .clean(fragment);
3293 assert_eq!(result.to_string(), "<p name=\"foo\"></p>",);
3294 }
3295 #[test]
3296 fn keep_allowed_attributes_with_tag_attribute_values() {
3297 let fragment = "<p data-label=\"bar\" name=\"foo\"></p>";
3298 let result = Builder::new()
3299 .tag_attribute_values(hashmap![
3300 "p" => hashmap![
3301 "data-label" => hashset!["bar"],
3302 ],
3303 ])
3304 .tag_attributes(hashmap![
3305 "p" => hashset!["name"],
3306 ])
3307 .clean(fragment);
3308 assert_eq!(
3309 result.to_string(),
3310 "<p data-label=\"bar\" name=\"foo\"></p>",
3311 );
3312 }
3313 #[test]
3314 fn tag_attribute_values_case_insensitive() {
3315 let fragment = "<input type=\"CHECKBOX\" name=\"foo\">";
3316 let result = Builder::new()
3317 .tags(hashset!["input"])
3318 .tag_attribute_values(hashmap![
3319 "input" => hashmap![
3320 "type" => hashset!["checkbox"],
3321 ],
3322 ])
3323 .tag_attributes(hashmap![
3324 "input" => hashset!["name"],
3325 ])
3326 .clean(fragment);
3327 assert_eq!(result.to_string(), "<input type=\"CHECKBOX\" name=\"foo\">",);
3328 }
3329 #[test]
3330 fn set_tag_attribute_values() {
3331 let fragment = "<a href=\"https://example.com/\">Link</a>";
3332 let result = Builder::new()
3333 .link_rel(None)
3334 .add_tag_attributes("a", &["target"])
3335 .set_tag_attribute_value("a", "target", "_blank")
3336 .clean(fragment);
3337 assert_eq!(
3338 result.to_string(),
3339 "<a href=\"https://example.com/\" target=\"_blank\">Link</a>",
3340 );
3341 }
3342 #[test]
3343 fn update_existing_set_tag_attribute_values() {
3344 let fragment = "<a target=\"bad\" href=\"https://example.com/\">Link</a>";
3345 let result = Builder::new()
3346 .link_rel(None)
3347 .add_tag_attributes("a", &["target"])
3348 .set_tag_attribute_value("a", "target", "_blank")
3349 .clean(fragment);
3350 assert_eq!(
3351 result.to_string(),
3352 "<a target=\"_blank\" href=\"https://example.com/\">Link</a>",
3353 );
3354 }
3355 #[test]
3356 fn unwhitelisted_set_tag_attribute_values() {
3357 let fragment = "<span>hi</span><my-elem>";
3358 let result = Builder::new()
3359 .set_tag_attribute_value("my-elem", "my-attr", "val")
3360 .clean(fragment);
3361 assert_eq!(result.to_string(), "<span>hi</span>",);
3362 }
3363 #[test]
3364 fn remove_entity_link() {
3365 let fragment = "<a href=\"&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61\
3366 &#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29\">Click me!</a>";
3367 let result = clean(fragment);
3368 assert_eq!(
3369 result.to_string(),
3370 "<a rel=\"noopener noreferrer\">Click me!</a>"
3371 );
3372 }
3373 #[test]
3374 fn remove_relative_url_evaluate() {
3375 fn is_absolute_path(url: &str) -> bool {
3376 let u = url.as_bytes();
3377 // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
3378 // `/a/b/c` is an absolute path, and what we want to do stuff to.
3379 u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
3380 }
3381 fn is_banned(url: &str) -> bool {
3382 let u = url.as_bytes();
3383 u.get(0) == Some(&b'b') && u.get(1) == Some(&b'a')
3384 }
3385 fn evaluate(url: &str) -> Option<Cow<'_, str>> {
3386 if is_absolute_path(url) {
3387 Some(Cow::Owned(String::from("/root") + url))
3388 } else if is_banned(url) {
3389 None
3390 } else {
3391 Some(Cow::Borrowed(url))
3392 }
3393 }
3394 let a = Builder::new()
3395 .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3396 .clean("<a href=banned>banned</a><a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
3397 .to_string();
3398 assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
3399 }
3400 #[test]
3401 fn remove_relative_url_evaluate_b() {
3402 fn is_absolute_path(url: &str) -> bool {
3403 let u = url.as_bytes();
3404 // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
3405 // `/a/b/c` is an absolute path, and what we want to do stuff to.
3406 u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
3407 }
3408 fn is_banned(url: &str) -> bool {
3409 let u = url.as_bytes();
3410 u.get(0) == Some(&b'b') && u.get(1) == Some(&b'a')
3411 }
3412 fn evaluate(url: &str) -> Option<Cow<'_, str>> {
3413 if is_absolute_path(url) {
3414 Some(Cow::Owned(String::from("/root") + url))
3415 } else if is_banned(url) {
3416 None
3417 } else {
3418 Some(Cow::Borrowed(url))
3419 }
3420 }
3421 let a = Builder::new()
3422 .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3423 .clean("<a href=banned>banned</a><a href=banned title=test>banned</a><a title=test href=banned>banned</a>")
3424 .to_string();
3425 assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a rel=\"noopener noreferrer\" title=\"test\">banned</a><a title=\"test\" rel=\"noopener noreferrer\">banned</a>");
3426 }
3427 #[test]
3428 fn remove_relative_url_evaluate_c() {
3429 // Don't run on absolute URLs.
3430 fn evaluate(_: &str) -> Option<Cow<'_, str>> {
3431 return Some(Cow::Owned(String::from("invalid")));
3432 }
3433 let a = Builder::new()
3434 .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3435 .clean("<a href=\"https://www.google.com/\">google</a>")
3436 .to_string();
3437 assert_eq!(
3438 a,
3439 "<a href=\"https://www.google.com/\" rel=\"noopener noreferrer\">google</a>"
3440 );
3441 }
3442 #[test]
3443 fn clean_children_of_bad_element() {
3444 let fragment = "<bad><evil>a</evil>b</bad>";
3445 let result = Builder::new().clean(fragment);
3446 assert_eq!(result.to_string(), "ab");
3447 }
3448 #[test]
3449 fn reader_input() {
3450 let fragment = b"an <script>evil()</script> example";
3451 let result = Builder::new().clean_from_reader(&fragment[..]);
3452 assert!(result.is_ok());
3453 assert_eq!(result.unwrap().to_string(), "an example");
3454 }
3455 #[test]
3456 fn reader_non_utf8() {
3457 let fragment = b"non-utf8 \xF0\x90\x80string";
3458 let result = Builder::new().clean_from_reader(&fragment[..]);
3459 assert!(result.is_ok());
3460 assert_eq!(result.unwrap().to_string(), "non-utf8 \u{fffd}string");
3461 }
3462 #[test]
3463 fn display_impl() {
3464 let fragment = r#"a <a>link</a>"#;
3465 let result = Builder::new().link_rel(None).clean(fragment);
3466 assert_eq!(format!("{}", result), "a <a>link</a>");
3467 }
3468 #[test]
3469 fn debug_impl() {
3470 let fragment = r#"a <a>link</a>"#;
3471 let result = Builder::new().link_rel(None).clean(fragment);
3472 assert_eq!(format!("{:?}", result), "Document(a <a>link</a>)");
3473 }
3474 #[cfg(ammonia_unstable)]
3475 #[test]
3476 fn to_dom_node() {
3477 let fragment = r#"a <a>link</a>"#;
3478 let result = Builder::new().link_rel(None).clean(fragment);
3479 let _node = result.to_dom_node();
3480 }
3481 #[test]
3482 fn string_from_document() {
3483 let fragment = r#"a <a>link"#;
3484 let result = String::from(Builder::new().link_rel(None).clean(fragment));
3485 assert_eq!(format!("{}", result), "a <a>link</a>");
3486 }
3487 fn require_sync<T: Sync>(_: T) {}
3488 fn require_send<T: Send>(_: T) {}
3489 #[test]
3490 fn require_sync_and_send() {
3491 require_sync(Builder::new());
3492 require_send(Builder::new());
3493 }
3494 #[test]
3495 fn id_prefixed() {
3496 let fragment = "<a id=\"hello\"></a><b id=\"hello\"></a>";
3497 let result = String::from(
3498 Builder::new()
3499 .tag_attributes(hashmap![
3500 "a" => hashset!["id"],
3501 ])
3502 .id_prefix(Some("prefix-"))
3503 .clean(fragment),
3504 );
3505 assert_eq!(
3506 result.to_string(),
3507 "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a><b></b>"
3508 );
3509 }
3510 #[test]
3511 fn id_already_prefixed() {
3512 let fragment = "<a id=\"prefix-hello\"></a>";
3513 let result = String::from(
3514 Builder::new()
3515 .tag_attributes(hashmap![
3516 "a" => hashset!["id"],
3517 ])
3518 .id_prefix(Some("prefix-"))
3519 .clean(fragment),
3520 );
3521 assert_eq!(
3522 result.to_string(),
3523 "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a>"
3524 );
3525 }
3526 #[test]
3527 fn clean_content_tags() {
3528 let fragment = "<script type=\"text/javascript\"><a>Hello!</a></script>";
3529 let result = String::from(
3530 Builder::new()
3531 .clean_content_tags(hashset!["script"])
3532 .clean(fragment),
3533 );
3534 assert_eq!(result.to_string(), "");
3535 }
3536 #[test]
3537 fn only_clean_content_tags() {
3538 let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
3539 let result = String::from(
3540 Builder::new()
3541 .clean_content_tags(hashset!["script"])
3542 .clean(fragment),
3543 );
3544 assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
3545 }
3546 #[test]
3547 fn clean_removed_default_tag() {
3548 let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
3549 let result = String::from(
3550 Builder::new()
3551 .rm_tags(hashset!["a"])
3552 .rm_tag_attributes("a", hashset!["href", "hreflang"])
3553 .clean_content_tags(hashset!["script"])
3554 .clean(fragment),
3555 );
3556 assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
3557 }
3558 #[test]
3559 #[should_panic]
3560 fn panic_on_clean_content_tag_attribute() {
3561 Builder::new()
3562 .rm_tags(std::iter::once("a"))
3563 .clean_content_tags(hashset!["a"])
3564 .clean("");
3565 }
3566 #[test]
3567 #[should_panic]
3568 fn panic_on_clean_content_tag() {
3569 Builder::new().clean_content_tags(hashset!["a"]).clean("");
3570 }
3571
3572 #[test]
3573 fn clean_text_test() {
3574 assert_eq!(
3575 clean_text("<this> is <a test function"),
3576 "&lt;this&gt;&#32;is&#32;&lt;a&#32;test&#32;function"
3577 );
3578 }
3579
3580 #[test]
3581 fn clean_text_spaces_test() {
3582 assert_eq!(clean_text("\x09\x0a\x0c\x20"), "&#9;&#10;&#12;&#32;");
3583 }
3584
3585 #[test]
3586 fn ns_svg() {
3587 // https://github.com/cure53/DOMPurify/pull/495
3588 let fragment = r##"<svg><iframe><a title="</iframe><img src onerror=alert(1)>">test"##;
3589 let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3590 assert_eq!(result.to_string(), "test");
3591
3592 let fragment = "<svg><iframe>remove me</iframe></svg><iframe>keep me</iframe>";
3593 let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3594 assert_eq!(result.to_string(), "remove me<iframe>keep me</iframe>");
3595
3596 let fragment = "<svg><a>remove me</a></svg><iframe>keep me</iframe>";
3597 let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3598 assert_eq!(result.to_string(), "remove me<iframe>keep me</iframe>");
3599
3600 let fragment = "<svg><a>keep me</a></svg><iframe>keep me</iframe>";
3601 let result = String::from(Builder::new().add_tags(&["iframe", "svg"]).clean(fragment));
3602 assert_eq!(
3603 result.to_string(),
3604 "<svg><a rel=\"noopener noreferrer\">keep me</a></svg><iframe>keep me</iframe>"
3605 );
3606 }
3607
3608 #[test]
3609 fn ns_mathml() {
3610 // https://github.com/cure53/DOMPurify/pull/495
3611 let fragment = "<mglyph></mglyph>";
3612 let result = String::from(
3613 Builder::new()
3614 .add_tags(&["math", "mtext", "mglyph"])
3615 .clean(fragment),
3616 );
3617 assert_eq!(result.to_string(), "");
3618 let fragment = "<math><mtext><div><mglyph>";
3619 let result = String::from(
3620 Builder::new()
3621 .add_tags(&["math", "mtext", "mglyph"])
3622 .clean(fragment),
3623 );
3624 assert_eq!(
3625 result.to_string(),
3626 "<math><mtext><div></div></mtext></math>"
3627 );
3628 let fragment = "<math><mtext><mglyph>";
3629 let result = String::from(
3630 Builder::new()
3631 .add_tags(&["math", "mtext", "mglyph"])
3632 .clean(fragment),
3633 );
3634 assert_eq!(
3635 result.to_string(),
3636 "<math><mtext><mglyph></mglyph></mtext></math>"
3637 );
3638 }
3639
3640 #[test]
3641 fn generic_attribute_prefixes() {
3642 let prefix_data = ["data-"];
3643 let prefix_code = ["code-"];
3644 let mut b = Builder::new();
3645 let mut hs: HashSet<&'_ str> = HashSet::new();
3646 hs.insert("data-");
3647 assert_eq!(b.generic_attribute_prefixes.is_none(), true);
3648 b.generic_attribute_prefixes(hs);
3649 assert_eq!(b.generic_attribute_prefixes.is_some(), true);
3650 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3651 b.add_generic_attribute_prefixes(&prefix_data);
3652 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3653 b.add_generic_attribute_prefixes(&prefix_code);
3654 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 2);
3655 b.rm_generic_attribute_prefixes(&prefix_code);
3656 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3657 b.rm_generic_attribute_prefixes(&prefix_code);
3658 assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3659 b.rm_generic_attribute_prefixes(&prefix_data);
3660 assert_eq!(b.generic_attribute_prefixes.is_none(), true);
3661 }
3662
3663 #[test]
3664 fn generic_attribute_prefixes_clean() {
3665 let fragment = r#"<a data-1 data-2 code-1 code-2><a>Hello!</a></a>"#;
3666 let result_cleaned = String::from(
3667 Builder::new()
3668 .add_tag_attributes("a", &["data-1"])
3669 .clean(fragment),
3670 );
3671 assert_eq!(
3672 result_cleaned,
3673 r#"<a data-1="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3674 );
3675 let result_allowed = String::from(
3676 Builder::new()
3677 .add_tag_attributes("a", &["data-1"])
3678 .add_generic_attribute_prefixes(&["data-"])
3679 .clean(fragment),
3680 );
3681 assert_eq!(
3682 result_allowed,
3683 r#"<a data-1="" data-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3684 );
3685 let result_allowed = String::from(
3686 Builder::new()
3687 .add_tag_attributes("a", &["data-1", "code-1"])
3688 .add_generic_attribute_prefixes(&["data-", "code-"])
3689 .clean(fragment),
3690 );
3691 assert_eq!(
3692 result_allowed,
3693 r#"<a data-1="" data-2="" code-1="" code-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3694 );
3695 }
3696 #[test]
3697 fn lesser_than_isnt_html() {
3698 let fragment = "1 < 2";
3699 assert!(!is_html(fragment));
3700 }
3701 #[test]
3702 fn dense_lesser_than_isnt_html() {
3703 let fragment = "1<2";
3704 assert!(!is_html(fragment));
3705 }
3706 #[test]
3707 fn what_about_number_elements() {
3708 let fragment = "foo<2>bar";
3709 assert!(!is_html(fragment));
3710 }
3711 #[test]
3712 fn turbofish_is_html_sadly() {
3713 let fragment = "Vec::<u8>::new()";
3714 assert!(is_html(fragment));
3715 }
3716 #[test]
3717 fn stop_grinning() {
3718 let fragment = "did you really believe me? <g>";
3719 assert!(is_html(fragment));
3720 }
3721 #[test]
3722 fn dont_be_bold() {
3723 let fragment = "<b>";
3724 assert!(is_html(fragment));
3725 }
3726
3727 #[test]
3728 fn rewrite_with_root() {
3729 let tests = [
3730 (
3731 "https://github.com/rust-ammonia/ammonia/blob/master/",
3732 "README.md",
3733 "",
3734 "https://github.com/rust-ammonia/ammonia/blob/master/README.md",
3735 ),
3736 (
3737 "https://github.com/rust-ammonia/ammonia/blob/master/",
3738 "README.md",
3739 "/",
3740 "https://github.com/rust-ammonia/ammonia/blob/master/",
3741 ),
3742 (
3743 "https://github.com/rust-ammonia/ammonia/blob/master/",
3744 "README.md",
3745 "/CONTRIBUTING.md",
3746 "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3747 ),
3748 (
3749 "https://github.com/rust-ammonia/ammonia/blob/master",
3750 "README.md",
3751 "",
3752 "https://github.com/rust-ammonia/ammonia/blob/README.md",
3753 ),
3754 (
3755 "https://github.com/rust-ammonia/ammonia/blob/master",
3756 "README.md",
3757 "/",
3758 "https://github.com/rust-ammonia/ammonia/blob/",
3759 ),
3760 (
3761 "https://github.com/rust-ammonia/ammonia/blob/master",
3762 "README.md",
3763 "/CONTRIBUTING.md",
3764 "https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md",
3765 ),
3766 (
3767 "https://github.com/rust-ammonia/ammonia/blob/master/",
3768 "",
3769 "",
3770 "https://github.com/rust-ammonia/ammonia/blob/master/",
3771 ),
3772 (
3773 "https://github.com/rust-ammonia/ammonia/blob/master/",
3774 "",
3775 "/",
3776 "https://github.com/rust-ammonia/ammonia/blob/master/",
3777 ),
3778 (
3779 "https://github.com/rust-ammonia/ammonia/blob/master/",
3780 "",
3781 "/CONTRIBUTING.md",
3782 "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3783 ),
3784 (
3785 "https://github.com/",
3786 "rust-ammonia/ammonia/blob/master/README.md",
3787 "",
3788 "https://github.com/rust-ammonia/ammonia/blob/master/README.md",
3789 ),
3790 (
3791 "https://github.com/",
3792 "rust-ammonia/ammonia/blob/master/README.md",
3793 "/",
3794 "https://github.com/",
3795 ),
3796 (
3797 "https://github.com/",
3798 "rust-ammonia/ammonia/blob/master/README.md",
3799 "CONTRIBUTING.md",
3800 "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3801 ),
3802 (
3803 "https://github.com/",
3804 "rust-ammonia/ammonia/blob/master/README.md",
3805 "/CONTRIBUTING.md",
3806 "https://github.com/CONTRIBUTING.md",
3807 ),
3808 ];
3809 for (root, path, url, result) in tests {
3810 let h = format!(r#"<a href="{url}">test</a>"#);
3811 let r = format!(r#"<a href="{result}" rel="noopener noreferrer">test</a>"#);
3812 let a = Builder::new()
3813 .url_relative(UrlRelative::RewriteWithRoot { root: Url::parse(root).unwrap(), path: path.to_string() })
3814 .clean(&h)
3815 .to_string();
3816 if r != a {
3817 println!("failed to check ({root}, {path}, {url}, {result})\n{r} != {a}", r = r);
3818 assert_eq!(r, a);
3819 }
3820 }
3821 }
3822}
3823