1 | // Copyright (C) Michael Howell and others |
2 | // this library is released under the same terms as Rust itself. |
3 | |
4 | #![deny (unsafe_code)] |
5 | #![deny (missing_docs)] |
6 | |
7 | //! Ammonia is a whitelist-based HTML sanitization library. It is designed to |
8 | //! prevent cross-site scripting, layout breaking, and clickjacking caused |
9 | //! by untrusted user-provided HTML being mixed into a larger web page. |
10 | //! |
11 | //! Ammonia uses [html5ever] to parse and serialize document fragments the same way browsers do, |
12 | //! so it is extremely resilient to syntactic obfuscation. |
13 | //! |
14 | //! Ammonia parses its input exactly according to the HTML5 specification; |
15 | //! it will not linkify bare URLs, insert line or paragraph breaks, or convert `(C)` into ©. |
16 | //! If you want that, use a markup processor before running the sanitizer, like [pulldown-cmark]. |
17 | //! |
18 | //! # Examples |
19 | //! |
20 | //! ``` |
21 | //! let result = ammonia::clean( |
22 | //! "<b><img src='' onerror=alert('hax')>I'm not trying to XSS you</b>" |
23 | //! ); |
24 | //! assert_eq!(result, "<b><img src= \"\">I'm not trying to XSS you</b>" ); |
25 | //! ``` |
26 | //! |
27 | //! [html5ever]: https://github.com/servo/html5ever "The HTML parser in Servo" |
28 | //! [pulldown-cmark]: https://github.com/google/pulldown-cmark "CommonMark parser" |
29 | |
30 | |
31 | #[cfg (ammonia_unstable)] |
32 | pub mod rcdom; |
33 | |
34 | #[cfg (not(ammonia_unstable))] |
35 | mod rcdom; |
36 | |
37 | use html5ever::interface::Attribute; |
38 | use html5ever::serialize::{serialize, SerializeOpts}; |
39 | use html5ever::tree_builder::{NodeOrText, TreeSink}; |
40 | use html5ever::{driver as html, local_name, namespace_url, ns, QualName}; |
41 | use maplit::{hashmap, hashset}; |
42 | use once_cell::sync::Lazy; |
43 | use rcdom::{Handle, NodeData, RcDom, SerializableHandle}; |
44 | use std::borrow::{Borrow, Cow}; |
45 | use std::cmp::max; |
46 | use std::collections::{HashMap, HashSet}; |
47 | use std::fmt; |
48 | use std::io; |
49 | use std::iter::IntoIterator as IntoIter; |
50 | use std::mem; |
51 | use std::rc::Rc; |
52 | use std::str::FromStr; |
53 | use tendril::stream::TendrilSink; |
54 | use tendril::StrTendril; |
55 | use tendril::{format_tendril, ByteTendril}; |
56 | pub use url::Url; |
57 | |
58 | use html5ever::buffer_queue::BufferQueue; |
59 | use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer}; |
60 | pub use url; |
61 | |
62 | static AMMONIA: Lazy<Builder<'static>> = Lazy::new(Builder::default); |
63 | |
64 | /// Clean HTML with a conservative set of defaults. |
65 | /// |
66 | /// * [tags](struct.Builder.html#defaults) |
67 | /// * [`script` and `style` have their contents stripped](struct.Builder.html#defaults-1) |
68 | /// * [attributes on specific tags](struct.Builder.html#defaults-2) |
69 | /// * [attributes on all tags](struct.Builder.html#defaults-6) |
70 | /// * [url schemes](struct.Builder.html#defaults-7) |
71 | /// * [relative URLs are passed through, unchanged, by default](struct.Builder.html#defaults-8) |
72 | /// * [links are marked `noopener noreferrer` by default](struct.Builder.html#defaults-9) |
73 | /// * all `class=""` settings are blocked by default |
74 | /// * comments are stripped by default |
75 | /// * no generic attribute prefixes are turned on by default |
76 | /// * no specific tag-attribute-value settings are configured by default |
77 | /// |
78 | /// [opener]: https://mathiasbynens.github.io/rel-noopener/ |
79 | /// [referrer]: https://en.wikipedia.org/wiki/HTTP_referer |
80 | /// |
81 | /// # Examples |
82 | /// |
83 | /// assert_eq!(ammonia::clean("XSS<script>attack</script>"), "XSS") |
84 | pub fn clean(src: &str) -> String { |
85 | AMMONIA.clean(src).to_string() |
86 | } |
87 | |
88 | /// Turn an arbitrary string into unformatted HTML. |
89 | /// |
90 | /// This function is roughly equivalent to PHP's `htmlspecialchars` and `htmlentities`. |
91 | /// It is as strict as possible, encoding every character that has special meaning to the |
92 | /// HTML parser. |
93 | /// |
94 | /// # Warnings |
95 | /// |
96 | /// This function cannot be used to package strings into a `<script>` or `<style>` tag; |
97 | /// you need a JavaScript or CSS escaper to do that. |
98 | /// |
99 | /// // DO NOT DO THIS |
100 | /// # use ammonia::clean_text; |
101 | /// let untrusted = "Robert\"); abuse();//"; |
102 | /// let html = format!("<script>invoke(\"{}\")</script>", clean_text(untrusted)); |
103 | /// |
104 | /// `<textarea>` tags will strip the first newline, if present, even if that newline is encoded. |
105 | /// If you want to build an editor that works the way most folks expect them to, you should put a |
106 | /// newline at the beginning of the tag, like this: |
107 | /// |
108 | /// # use ammonia::{Builder, clean_text}; |
109 | /// let untrusted = "\n\nhi!"; |
110 | /// let mut b = Builder::new(); |
111 | /// b.add_tags(&["textarea"]); |
112 | /// // This is the bad version |
113 | /// // The user put two newlines at the beginning, but the first one was removed |
114 | /// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted))).to_string(); |
115 | /// assert_eq!("<textarea>\nhi!</textarea>", sanitized); |
116 | /// // This is a good version |
117 | /// // The user put two newlines at the beginning, and we add a third one, |
118 | /// // so the result still has two |
119 | /// let sanitized = b.clean(&format!("<textarea>\n{}</textarea>", clean_text(untrusted))).to_string(); |
120 | /// assert_eq!("<textarea>\n\nhi!</textarea>", sanitized); |
121 | /// // This version is also often considered good |
122 | /// // For many applications, leading and trailing whitespace is probably unwanted |
123 | /// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted.trim()))).to_string(); |
124 | /// assert_eq!("<textarea>hi!</textarea>", sanitized); |
125 | /// |
126 | /// It also does not make user text safe for HTML attribute microsyntaxes such as `class` or `id`. |
127 | /// Only use this function for places where HTML accepts unrestricted text such as `title` attributes |
128 | /// and paragraph contents. |
129 | pub fn clean_text(src: &str) -> String { |
130 | let mut ret_val = String::with_capacity(max(4, src.len())); |
131 | for c in src.chars() { |
132 | let replacement = match c { |
133 | // this character, when confronted, will start a tag |
134 | '<' => "<" , |
135 | // in an unquoted attribute, will end the attribute value |
136 | '>' => ">" , |
137 | // in an attribute surrounded by double quotes, this character will end the attribute value |
138 | ' \"' => """ , |
139 | // in an attribute surrounded by single quotes, this character will end the attribute value |
140 | ' \'' => "'" , |
141 | // in HTML5, returns a bogus parse error in an unquoted attribute, while in SGML/HTML, it will end an attribute value surrounded by backquotes |
142 | '`' => "`" , |
143 | // in an unquoted attribute, this character will end the attribute |
144 | '/' => "/" , |
145 | // starts an entity reference |
146 | '&' => "&" , |
147 | // if at the beginning of an unquoted attribute, will get ignored |
148 | '=' => "=" , |
149 | // will end an unquoted attribute |
150 | ' ' => " " , |
151 | ' \t' => "	" , |
152 | ' \n' => " " , |
153 | ' \x0c' => "" , |
154 | ' \r' => " " , |
155 | // a spec-compliant browser will perform this replacement anyway, but the middleware might not |
156 | ' \0' => "�" , |
157 | // ALL OTHER CHARACTERS ARE PASSED THROUGH VERBATIM |
158 | _ => { |
159 | ret_val.push(c); |
160 | continue; |
161 | } |
162 | }; |
163 | ret_val.push_str(replacement); |
164 | } |
165 | ret_val |
166 | } |
167 | |
168 | /// Determine if a given string contains HTML |
169 | /// |
170 | /// This function is parses the full string into HTML and checks if the input contained any |
171 | /// HTML syntax. |
172 | /// |
173 | /// # Note |
174 | /// This function will return positively for strings that contain invalid HTML syntax like |
175 | /// `<g>` and even `Vec::<u8>::new()`. |
176 | pub fn is_html(input: &str) -> bool { |
177 | let santok: SanitizationTokenizer = SanitizationTokenizer::new(); |
178 | let mut chunk: Tendril = ByteTendril::new(); |
179 | chunk.push_slice(input.as_bytes()); |
180 | let mut input: BufferQueue = BufferQueue::new(); |
181 | input.push_back(buf:chunk.try_reinterpret().unwrap()); |
182 | |
183 | let mut tok: Tokenizer = Tokenizer::new(sink:santok, opts:Default::default()); |
184 | let _ = tok.feed(&mut input); |
185 | tok.end(); |
186 | tok.sink.was_sanitized |
187 | } |
188 | |
189 | #[derive (Copy, Clone)] |
190 | struct SanitizationTokenizer { |
191 | was_sanitized: bool, |
192 | } |
193 | |
194 | impl SanitizationTokenizer { |
195 | pub fn new() -> SanitizationTokenizer { |
196 | SanitizationTokenizer { |
197 | was_sanitized: false, |
198 | } |
199 | } |
200 | } |
201 | |
202 | impl TokenSink for SanitizationTokenizer { |
203 | type Handle = (); |
204 | fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> { |
205 | match token { |
206 | Token::CharacterTokens(_) | Token::EOFToken | Token::ParseError(_) => {} |
207 | _ => { |
208 | self.was_sanitized = true; |
209 | } |
210 | } |
211 | TokenSinkResult::Continue |
212 | } |
213 | fn end(&mut self) {} |
214 | } |
215 | |
216 | /// An HTML sanitizer. |
217 | /// |
218 | /// Given a fragment of HTML, Ammonia will parse it according to the HTML5 |
219 | /// parsing algorithm and sanitize any disallowed tags or attributes. This |
220 | /// algorithm also takes care of things like unclosed and (some) misnested |
221 | /// tags. |
222 | /// |
223 | /// # Examples |
224 | /// |
225 | /// use ammonia::{Builder, UrlRelative}; |
226 | /// |
227 | /// let a = Builder::default() |
228 | /// .link_rel(None) |
229 | /// .url_relative(UrlRelative::PassThrough) |
230 | /// .clean("<a href=/>test") |
231 | /// .to_string(); |
232 | /// assert_eq!( |
233 | /// a, |
234 | /// "<a href=\"/\">test</a>"); |
235 | /// |
236 | /// # Panics |
237 | /// |
238 | /// Running [`clean`] or [`clean_from_reader`] may cause a panic if the builder is |
239 | /// configured with any of these (contradictory) settings: |
240 | /// |
241 | /// * The `rel` attribute is added to [`generic_attributes`] or the |
242 | /// [`tag_attributes`] for the `<a>` tag, and [`link_rel`] is not set to `None`. |
243 | /// |
244 | /// For example, this is going to panic, since [`link_rel`] is set to |
245 | /// `Some("noopener noreferrer")` by default, |
246 | /// and it makes no sense to simultaneously say that the user is allowed to |
247 | /// set their own `rel` attribute while saying that every link shall be set to |
248 | /// a particular value: |
249 | /// |
250 | /// ```should_panic |
251 | /// use ammonia::Builder; |
252 | /// use maplit::hashset; |
253 | /// |
254 | /// # fn main() { |
255 | /// Builder::default() |
256 | /// .generic_attributes(hashset!["rel" ]) |
257 | /// .clean("" ); |
258 | /// # } |
259 | /// ``` |
260 | /// |
261 | /// This, however, is perfectly valid: |
262 | /// |
263 | /// ``` |
264 | /// use ammonia::Builder; |
265 | /// use maplit::hashset; |
266 | /// |
267 | /// # fn main() { |
268 | /// Builder::default() |
269 | /// .generic_attributes(hashset!["rel" ]) |
270 | /// .link_rel(None) |
271 | /// .clean("" ); |
272 | /// # } |
273 | /// ``` |
274 | /// |
275 | /// * The `class` attribute is in [`allowed_classes`] and is in the |
276 | /// corresponding [`tag_attributes`] or in [`generic_attributes`]. |
277 | /// |
278 | /// This is done both to line up with the treatment of `rel`, |
279 | /// and to prevent people from accidentally allowing arbitrary |
280 | /// classes on a particular element. |
281 | /// |
282 | /// This will panic: |
283 | /// |
284 | /// ```should_panic |
285 | /// use ammonia::Builder; |
286 | /// use maplit::{hashmap, hashset}; |
287 | /// |
288 | /// # fn main() { |
289 | /// Builder::default() |
290 | /// .generic_attributes(hashset!["class" ]) |
291 | /// .allowed_classes(hashmap!["span" => hashset!["hidden" ]]) |
292 | /// .clean("" ); |
293 | /// # } |
294 | /// ``` |
295 | /// |
296 | /// This, however, is perfectly valid: |
297 | /// |
298 | /// ``` |
299 | /// use ammonia::Builder; |
300 | /// use maplit::{hashmap, hashset}; |
301 | /// |
302 | /// # fn main() { |
303 | /// Builder::default() |
304 | /// .allowed_classes(hashmap!["span" => hashset!["hidden" ]]) |
305 | /// .clean("" ); |
306 | /// # } |
307 | /// ``` |
308 | /// |
309 | /// * A tag is in either [`tags`] or [`tag_attributes`] while also |
310 | /// being in [`clean_content_tags`]. |
311 | /// |
312 | /// Both [`tags`] and [`tag_attributes`] are whitelists but |
313 | /// [`clean_content_tags`] is a blacklist, so it doesn't make sense |
314 | /// to have the same tag in both. |
315 | /// |
316 | /// For example, this will panic, since the `aside` tag is in |
317 | /// [`tags`] by default: |
318 | /// |
319 | /// ```should_panic |
320 | /// use ammonia::Builder; |
321 | /// use maplit::hashset; |
322 | /// |
323 | /// # fn main() { |
324 | /// Builder::default() |
325 | /// .clean_content_tags(hashset!["aside" ]) |
326 | /// .clean("" ); |
327 | /// # } |
328 | /// ``` |
329 | /// |
330 | /// This, however, is valid: |
331 | /// |
332 | /// ``` |
333 | /// use ammonia::Builder; |
334 | /// use maplit::hashset; |
335 | /// |
336 | /// # fn main() { |
337 | /// Builder::default() |
338 | /// .rm_tags(&["aside" ]) |
339 | /// .clean_content_tags(hashset!["aside" ]) |
340 | /// .clean("" ); |
341 | /// # } |
342 | /// ``` |
343 | /// |
344 | /// [`clean`]: #method.clean |
345 | /// [`clean_from_reader`]: #method.clean_from_reader |
346 | /// [`generic_attributes`]: #method.generic_attributes |
347 | /// [`tag_attributes`]: #method.tag_attributes |
348 | /// [`generic_attributes`]: #method.generic_attributes |
349 | /// [`link_rel`]: #method.link_rel |
350 | /// [`allowed_classes`]: #method.allowed_classes |
351 | /// [`id_prefix`]: #method.id_prefix |
352 | /// [`tags`]: #method.tags |
353 | /// [`clean_content_tags`]: #method.clean_content_tags |
354 | #[derive (Debug)] |
355 | pub struct Builder<'a> { |
356 | tags: HashSet<&'a str>, |
357 | clean_content_tags: HashSet<&'a str>, |
358 | tag_attributes: HashMap<&'a str, HashSet<&'a str>>, |
359 | tag_attribute_values: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>, |
360 | set_tag_attribute_values: HashMap<&'a str, HashMap<&'a str, &'a str>>, |
361 | generic_attributes: HashSet<&'a str>, |
362 | url_schemes: HashSet<&'a str>, |
363 | url_relative: UrlRelative, |
364 | attribute_filter: Option<Box<dyn AttributeFilter>>, |
365 | link_rel: Option<&'a str>, |
366 | allowed_classes: HashMap<&'a str, HashSet<&'a str>>, |
367 | strip_comments: bool, |
368 | id_prefix: Option<&'a str>, |
369 | generic_attribute_prefixes: Option<HashSet<&'a str>>, |
370 | } |
371 | |
372 | impl<'a> Default for Builder<'a> { |
373 | fn default() -> Self { |
374 | #[rustfmt::skip] |
375 | let tags = hashset![ |
376 | "a" , "abbr" , "acronym" , "area" , "article" , "aside" , "b" , "bdi" , |
377 | "bdo" , "blockquote" , "br" , "caption" , "center" , "cite" , "code" , |
378 | "col" , "colgroup" , "data" , "dd" , "del" , "details" , "dfn" , "div" , |
379 | "dl" , "dt" , "em" , "figcaption" , "figure" , "footer" , "h1" , "h2" , |
380 | "h3" , "h4" , "h5" , "h6" , "header" , "hgroup" , "hr" , "i" , "img" , |
381 | "ins" , "kbd" , "kbd" , "li" , "map" , "mark" , "nav" , "ol" , "p" , "pre" , |
382 | "q" , "rp" , "rt" , "rtc" , "ruby" , "s" , "samp" , "small" , "span" , |
383 | "strike" , "strong" , "sub" , "summary" , "sup" , "table" , "tbody" , |
384 | "td" , "th" , "thead" , "time" , "tr" , "tt" , "u" , "ul" , "var" , "wbr" |
385 | ]; |
386 | let clean_content_tags = hashset!["script" , "style" ]; |
387 | let generic_attributes = hashset!["lang" , "title" ]; |
388 | let tag_attributes = hashmap![ |
389 | "a" => hashset![ |
390 | "href" , "hreflang" |
391 | ], |
392 | "bdo" => hashset![ |
393 | "dir" |
394 | ], |
395 | "blockquote" => hashset![ |
396 | "cite" |
397 | ], |
398 | "col" => hashset![ |
399 | "align" , "char" , "charoff" , "span" |
400 | ], |
401 | "colgroup" => hashset![ |
402 | "align" , "char" , "charoff" , "span" |
403 | ], |
404 | "del" => hashset![ |
405 | "cite" , "datetime" |
406 | ], |
407 | "hr" => hashset![ |
408 | "align" , "size" , "width" |
409 | ], |
410 | "img" => hashset![ |
411 | "align" , "alt" , "height" , "src" , "width" |
412 | ], |
413 | "ins" => hashset![ |
414 | "cite" , "datetime" |
415 | ], |
416 | "ol" => hashset![ |
417 | "start" |
418 | ], |
419 | "q" => hashset![ |
420 | "cite" |
421 | ], |
422 | "table" => hashset![ |
423 | "align" , "char" , "charoff" , "summary" |
424 | ], |
425 | "tbody" => hashset![ |
426 | "align" , "char" , "charoff" |
427 | ], |
428 | "td" => hashset![ |
429 | "align" , "char" , "charoff" , "colspan" , "headers" , "rowspan" |
430 | ], |
431 | "tfoot" => hashset![ |
432 | "align" , "char" , "charoff" |
433 | ], |
434 | "th" => hashset![ |
435 | "align" , "char" , "charoff" , "colspan" , "headers" , "rowspan" , "scope" |
436 | ], |
437 | "thead" => hashset![ |
438 | "align" , "char" , "charoff" |
439 | ], |
440 | "tr" => hashset![ |
441 | "align" , "char" , "charoff" |
442 | ], |
443 | ]; |
444 | let tag_attribute_values = hashmap![]; |
445 | let set_tag_attribute_values = hashmap![]; |
446 | let url_schemes = hashset![ |
447 | "bitcoin" , |
448 | "ftp" , |
449 | "ftps" , |
450 | "geo" , |
451 | "http" , |
452 | "https" , |
453 | "im" , |
454 | "irc" , |
455 | "ircs" , |
456 | "magnet" , |
457 | "mailto" , |
458 | "mms" , |
459 | "mx" , |
460 | "news" , |
461 | "nntp" , |
462 | "openpgp4fpr" , |
463 | "sip" , |
464 | "sms" , |
465 | "smsto" , |
466 | "ssh" , |
467 | "tel" , |
468 | "url" , |
469 | "webcal" , |
470 | "wtai" , |
471 | "xmpp" |
472 | ]; |
473 | let allowed_classes = hashmap![]; |
474 | |
475 | Builder { |
476 | tags, |
477 | clean_content_tags, |
478 | tag_attributes, |
479 | tag_attribute_values, |
480 | set_tag_attribute_values, |
481 | generic_attributes, |
482 | url_schemes, |
483 | url_relative: UrlRelative::PassThrough, |
484 | attribute_filter: None, |
485 | link_rel: Some("noopener noreferrer" ), |
486 | allowed_classes, |
487 | strip_comments: true, |
488 | id_prefix: None, |
489 | generic_attribute_prefixes: None, |
490 | } |
491 | } |
492 | } |
493 | |
494 | impl<'a> Builder<'a> { |
495 | /// Sets the tags that are allowed. |
496 | /// |
497 | /// # Examples |
498 | /// |
499 | /// use ammonia::Builder; |
500 | /// use maplit::hashset; |
501 | /// |
502 | /// # fn main() { |
503 | /// let tags = hashset!["my-tag"]; |
504 | /// let a = Builder::new() |
505 | /// .tags(tags) |
506 | /// .clean("<my-tag>") |
507 | /// .to_string(); |
508 | /// assert_eq!(a, "<my-tag></my-tag>"); |
509 | /// # } |
510 | /// |
511 | /// # Defaults |
512 | /// |
513 | /// ```notest |
514 | /// a, abbr, acronym, area, article, aside, b, bdi, |
515 | /// bdo, blockquote, br, caption, center, cite, code, |
516 | /// col, colgroup, data, dd, del, details, dfn, div, |
517 | /// dl, dt, em, figcaption, figure, footer, h1, h2, |
518 | /// h3, h4, h5, h6, header, hgroup, hr, i, img, |
519 | /// ins, kbd, kbd, li, map, mark, nav, ol, p, pre, |
520 | /// q, rp, rt, rtc, ruby, s, samp, small, span, |
521 | /// strike, strong, sub, summary, sup, table, tbody, |
522 | /// td, th, thead, time, tr, tt, u, ul, var, wbr |
523 | /// ``` |
524 | pub fn tags(&mut self, value: HashSet<&'a str>) -> &mut Self { |
525 | self.tags = value; |
526 | self |
527 | } |
528 | |
529 | /// Add additonal whitelisted tags without overwriting old ones. |
530 | /// |
531 | /// Does nothing if the tag is already there. |
532 | /// |
533 | /// # Examples |
534 | /// |
535 | /// let a = ammonia::Builder::default() |
536 | /// .add_tags(&["my-tag"]) |
537 | /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string(); |
538 | /// assert_eq!("<my-tag>test</my-tag> <span>mess</span>", a); |
539 | pub fn add_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( |
540 | &mut self, |
541 | it: I, |
542 | ) -> &mut Self { |
543 | self.tags.extend(it.into_iter().map(Borrow::borrow)); |
544 | self |
545 | } |
546 | |
547 | /// Remove already-whitelisted tags. |
548 | /// |
549 | /// Does nothing if the tags is already gone. |
550 | /// |
551 | /// # Examples |
552 | /// |
553 | /// let a = ammonia::Builder::default() |
554 | /// .rm_tags(&["span"]) |
555 | /// .clean("<span></span>").to_string(); |
556 | /// assert_eq!("", a); |
557 | pub fn rm_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( |
558 | &mut self, |
559 | it: I, |
560 | ) -> &mut Self { |
561 | for i in it { |
562 | self.tags.remove(i.borrow()); |
563 | } |
564 | self |
565 | } |
566 | |
567 | /// Returns a copy of the set of whitelisted tags. |
568 | /// |
569 | /// # Examples |
570 | /// |
571 | /// use maplit::hashset; |
572 | /// |
573 | /// let tags = hashset!["my-tag-1", "my-tag-2"]; |
574 | /// |
575 | /// let mut b = ammonia::Builder::default(); |
576 | /// b.tags(Clone::clone(&tags)); |
577 | /// assert_eq!(tags, b.clone_tags()); |
578 | pub fn clone_tags(&self) -> HashSet<&'a str> { |
579 | self.tags.clone() |
580 | } |
581 | |
582 | /// Sets the tags whose contents will be completely removed from the output. |
583 | /// |
584 | /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause |
585 | /// a panic. |
586 | /// |
587 | /// # Examples |
588 | /// |
589 | /// use ammonia::Builder; |
590 | /// use maplit::hashset; |
591 | /// |
592 | /// # fn main() { |
593 | /// let tag_blacklist = hashset!["script", "style"]; |
594 | /// let a = Builder::new() |
595 | /// .clean_content_tags(tag_blacklist) |
596 | /// .clean("<script>alert('hello')</script><style>a { background: #fff }</style>") |
597 | /// .to_string(); |
598 | /// assert_eq!(a, ""); |
599 | /// # } |
600 | /// |
601 | /// # Defaults |
602 | /// |
603 | /// ```notest |
604 | /// script, style |
605 | /// ``` |
606 | pub fn clean_content_tags(&mut self, value: HashSet<&'a str>) -> &mut Self { |
607 | self.clean_content_tags = value; |
608 | self |
609 | } |
610 | |
611 | /// Add additonal blacklisted clean-content tags without overwriting old ones. |
612 | /// |
613 | /// Does nothing if the tag is already there. |
614 | /// |
615 | /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause |
616 | /// a panic. |
617 | /// |
618 | /// # Examples |
619 | /// |
620 | /// let a = ammonia::Builder::default() |
621 | /// .add_clean_content_tags(&["my-tag"]) |
622 | /// .clean("<my-tag>test</my-tag><span>mess</span>").to_string(); |
623 | /// assert_eq!("<span>mess</span>", a); |
624 | pub fn add_clean_content_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( |
625 | &mut self, |
626 | it: I, |
627 | ) -> &mut Self { |
628 | self.clean_content_tags |
629 | .extend(it.into_iter().map(Borrow::borrow)); |
630 | self |
631 | } |
632 | |
633 | /// Remove already-blacklisted clean-content tags. |
634 | /// |
635 | /// Does nothing if the tags aren't blacklisted. |
636 | /// |
637 | /// # Examples |
638 | /// use ammonia::Builder; |
639 | /// use maplit::hashset; |
640 | /// |
641 | /// # fn main() { |
642 | /// let tag_blacklist = hashset!["script"]; |
643 | /// let a = ammonia::Builder::default() |
644 | /// .clean_content_tags(tag_blacklist) |
645 | /// .rm_clean_content_tags(&["script"]) |
646 | /// .clean("<script>XSS</script>").to_string(); |
647 | /// assert_eq!("XSS", a); |
648 | /// # } |
649 | pub fn rm_clean_content_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( |
650 | &mut self, |
651 | it: I, |
652 | ) -> &mut Self { |
653 | for i in it { |
654 | self.clean_content_tags.remove(i.borrow()); |
655 | } |
656 | self |
657 | } |
658 | |
659 | /// Returns a copy of the set of blacklisted clean-content tags. |
660 | /// |
661 | /// # Examples |
662 | /// # use maplit::hashset; |
663 | /// |
664 | /// let tags = hashset!["my-tag-1", "my-tag-2"]; |
665 | /// |
666 | /// let mut b = ammonia::Builder::default(); |
667 | /// b.clean_content_tags(Clone::clone(&tags)); |
668 | /// assert_eq!(tags, b.clone_clean_content_tags()); |
669 | pub fn clone_clean_content_tags(&self) -> HashSet<&'a str> { |
670 | self.clean_content_tags.clone() |
671 | } |
672 | |
673 | /// Sets the HTML attributes that are allowed on specific tags. |
674 | /// |
675 | /// The value is structured as a map from tag names to a set of attribute names. |
676 | /// |
677 | /// If a tag is not itself whitelisted, adding entries to this map will do nothing. |
678 | /// |
679 | /// # Examples |
680 | /// |
681 | /// use ammonia::Builder; |
682 | /// use maplit::{hashmap, hashset}; |
683 | /// |
684 | /// # fn main() { |
685 | /// let tags = hashset!["my-tag"]; |
686 | /// let tag_attributes = hashmap![ |
687 | /// "my-tag" => hashset!["val"] |
688 | /// ]; |
689 | /// let a = Builder::new().tags(tags).tag_attributes(tag_attributes) |
690 | /// .clean("<my-tag val=1>") |
691 | /// .to_string(); |
692 | /// assert_eq!(a, "<my-tag val=\"1\"></my-tag>"); |
693 | /// # } |
694 | /// |
695 | /// # Defaults |
696 | /// |
697 | /// ```notest |
698 | /// a => |
699 | /// href, hreflang |
700 | /// bdo => |
701 | /// dir |
702 | /// blockquote => |
703 | /// cite |
704 | /// col => |
705 | /// align, char, charoff, span |
706 | /// colgroup => |
707 | /// align, char, charoff, span |
708 | /// del => |
709 | /// cite, datetime |
710 | /// hr => |
711 | /// align, size, width |
712 | /// img => |
713 | /// align, alt, height, src, width |
714 | /// ins => |
715 | /// cite, datetime |
716 | /// ol => |
717 | /// start |
718 | /// q => |
719 | /// cite |
720 | /// table => |
721 | /// align, char, charoff, summary |
722 | /// tbody => |
723 | /// align, char, charoff |
724 | /// td => |
725 | /// align, char, charoff, colspan, headers, rowspan |
726 | /// tfoot => |
727 | /// align, char, charoff |
728 | /// th => |
729 | /// align, char, charoff, colspan, headers, rowspan, scope |
730 | /// thead => |
731 | /// align, char, charoff |
732 | /// tr => |
733 | /// align, char, charoff |
734 | /// ``` |
735 | pub fn tag_attributes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self { |
736 | self.tag_attributes = value; |
737 | self |
738 | } |
739 | |
740 | /// Add additonal whitelisted tag-specific attributes without overwriting old ones. |
741 | /// |
742 | /// # Examples |
743 | /// |
744 | /// let a = ammonia::Builder::default() |
745 | /// .add_tags(&["my-tag"]) |
746 | /// .add_tag_attributes("my-tag", &["my-attr"]) |
747 | /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string(); |
748 | /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a); |
749 | pub fn add_tag_attributes< |
750 | T: 'a + ?Sized + Borrow<str>, |
751 | U: 'a + ?Sized + Borrow<str>, |
752 | I: IntoIter<Item = &'a T>, |
753 | >( |
754 | &mut self, |
755 | tag: &'a U, |
756 | it: I, |
757 | ) -> &mut Self { |
758 | self.tag_attributes |
759 | .entry(tag.borrow()) |
760 | .or_insert_with(HashSet::new) |
761 | .extend(it.into_iter().map(Borrow::borrow)); |
762 | self |
763 | } |
764 | |
765 | /// Remove already-whitelisted tag-specific attributes. |
766 | /// |
767 | /// Does nothing if the attribute is already gone. |
768 | /// |
769 | /// # Examples |
770 | /// |
771 | /// let a = ammonia::Builder::default() |
772 | /// .rm_tag_attributes("a", &["href"]) |
773 | /// .clean("<a href=\"/\"></a>").to_string(); |
774 | /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a); |
775 | pub fn rm_tag_attributes< |
776 | 'b, |
777 | 'c, |
778 | T: 'b + ?Sized + Borrow<str>, |
779 | U: 'c + ?Sized + Borrow<str>, |
780 | I: IntoIter<Item = &'b T>, |
781 | >( |
782 | &mut self, |
783 | tag: &'c U, |
784 | it: I, |
785 | ) -> &mut Self { |
786 | if let Some(tag) = self.tag_attributes.get_mut(tag.borrow()) { |
787 | for i in it { |
788 | tag.remove(i.borrow()); |
789 | } |
790 | } |
791 | self |
792 | } |
793 | |
794 | /// Returns a copy of the set of whitelisted tag-specific attributes. |
795 | /// |
796 | /// # Examples |
797 | /// use maplit::{hashmap, hashset}; |
798 | /// |
799 | /// let tag_attributes = hashmap![ |
800 | /// "my-tag" => hashset!["my-attr-1", "my-attr-2"] |
801 | /// ]; |
802 | /// |
803 | /// let mut b = ammonia::Builder::default(); |
804 | /// b.tag_attributes(Clone::clone(&tag_attributes)); |
805 | /// assert_eq!(tag_attributes, b.clone_tag_attributes()); |
806 | pub fn clone_tag_attributes(&self) -> HashMap<&'a str, HashSet<&'a str>> { |
807 | self.tag_attributes.clone() |
808 | } |
809 | |
810 | /// Sets the values of HTML attributes that are allowed on specific tags. |
811 | /// |
812 | /// The value is structured as a map from tag names to a map from attribute names to a set of |
813 | /// attribute values. |
814 | /// |
815 | /// If a tag is not itself whitelisted, adding entries to this map will do nothing. |
816 | /// |
817 | /// # Examples |
818 | /// |
819 | /// use ammonia::Builder; |
820 | /// use maplit::{hashmap, hashset}; |
821 | /// |
822 | /// # fn main() { |
823 | /// let tags = hashset!["my-tag"]; |
824 | /// let tag_attribute_values = hashmap![ |
825 | /// "my-tag" => hashmap![ |
826 | /// "my-attr" => hashset!["val"], |
827 | /// ], |
828 | /// ]; |
829 | /// let a = Builder::new().tags(tags).tag_attribute_values(tag_attribute_values) |
830 | /// .clean("<my-tag my-attr=val>") |
831 | /// .to_string(); |
832 | /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>"); |
833 | /// # } |
834 | /// |
835 | /// # Defaults |
836 | /// |
837 | /// None. |
838 | pub fn tag_attribute_values( |
839 | &mut self, |
840 | value: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>, |
841 | ) -> &mut Self { |
842 | self.tag_attribute_values = value; |
843 | self |
844 | } |
845 | |
846 | /// Add additonal whitelisted tag-specific attribute values without overwriting old ones. |
847 | /// |
848 | /// # Examples |
849 | /// |
850 | /// let a = ammonia::Builder::default() |
851 | /// .add_tags(&["my-tag"]) |
852 | /// .add_tag_attribute_values("my-tag", "my-attr", &[""]) |
853 | /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string(); |
854 | /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a); |
855 | pub fn add_tag_attribute_values< |
856 | T: 'a + ?Sized + Borrow<str>, |
857 | U: 'a + ?Sized + Borrow<str>, |
858 | V: 'a + ?Sized + Borrow<str>, |
859 | I: IntoIter<Item = &'a T>, |
860 | >( |
861 | &mut self, |
862 | tag: &'a U, |
863 | attribute: &'a V, |
864 | it: I, |
865 | ) -> &mut Self { |
866 | self.tag_attribute_values |
867 | .entry(tag.borrow()) |
868 | .or_insert_with(HashMap::new) |
869 | .entry(attribute.borrow()) |
870 | .or_insert_with(HashSet::new) |
871 | .extend(it.into_iter().map(Borrow::borrow)); |
872 | |
873 | self |
874 | } |
875 | |
876 | /// Remove already-whitelisted tag-specific attribute values. |
877 | /// |
878 | /// Does nothing if the attribute or the value is already gone. |
879 | /// |
880 | /// # Examples |
881 | /// |
882 | /// let a = ammonia::Builder::default() |
883 | /// .rm_tag_attributes("a", &["href"]) |
884 | /// .add_tag_attribute_values("a", "href", &["/"]) |
885 | /// .rm_tag_attribute_values("a", "href", &["/"]) |
886 | /// .clean("<a href=\"/\"></a>").to_string(); |
887 | /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a); |
888 | pub fn rm_tag_attribute_values< |
889 | 'b, |
890 | 'c, |
891 | T: 'b + ?Sized + Borrow<str>, |
892 | U: 'c + ?Sized + Borrow<str>, |
893 | V: 'c + ?Sized + Borrow<str>, |
894 | I: IntoIter<Item = &'b T>, |
895 | >( |
896 | &mut self, |
897 | tag: &'c U, |
898 | attribute: &'c V, |
899 | it: I, |
900 | ) -> &mut Self { |
901 | if let Some(attrs) = self |
902 | .tag_attribute_values |
903 | .get_mut(tag.borrow()) |
904 | .and_then(|map| map.get_mut(attribute.borrow())) |
905 | { |
906 | for i in it { |
907 | attrs.remove(i.borrow()); |
908 | } |
909 | } |
910 | self |
911 | } |
912 | |
913 | /// Returns a copy of the set of whitelisted tag-specific attribute values. |
914 | /// |
915 | /// # Examples |
916 | /// |
917 | /// use maplit::{hashmap, hashset}; |
918 | /// |
919 | /// let attribute_values = hashmap![ |
920 | /// "my-attr-1" => hashset!["foo"], |
921 | /// "my-attr-2" => hashset!["baz", "bar"], |
922 | /// ]; |
923 | /// let tag_attribute_values = hashmap![ |
924 | /// "my-tag" => attribute_values |
925 | /// ]; |
926 | /// |
927 | /// let mut b = ammonia::Builder::default(); |
928 | /// b.tag_attribute_values(Clone::clone(&tag_attribute_values)); |
929 | /// assert_eq!(tag_attribute_values, b.clone_tag_attribute_values()); |
930 | pub fn clone_tag_attribute_values( |
931 | &self, |
932 | ) -> HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>> { |
933 | self.tag_attribute_values.clone() |
934 | } |
935 | |
936 | /// Sets the values of HTML attributes that are to be set on specific tags. |
937 | /// |
938 | /// The value is structured as a map from tag names to a map from attribute names to an |
939 | /// attribute value. |
940 | /// |
941 | /// If a tag is not itself whitelisted, adding entries to this map will do nothing. |
942 | /// |
943 | /// # Examples |
944 | /// |
945 | /// use ammonia::Builder; |
946 | /// use maplit::{hashmap, hashset}; |
947 | /// |
948 | /// # fn main() { |
949 | /// let tags = hashset!["my-tag"]; |
950 | /// let set_tag_attribute_values = hashmap![ |
951 | /// "my-tag" => hashmap![ |
952 | /// "my-attr" => "val", |
953 | /// ], |
954 | /// ]; |
955 | /// let a = Builder::new().tags(tags).set_tag_attribute_values(set_tag_attribute_values) |
956 | /// .clean("<my-tag>") |
957 | /// .to_string(); |
958 | /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>"); |
959 | /// # } |
960 | /// |
961 | /// # Defaults |
962 | /// |
963 | /// None. |
964 | pub fn set_tag_attribute_values( |
965 | &mut self, |
966 | value: HashMap<&'a str, HashMap<&'a str, &'a str>>, |
967 | ) -> &mut Self { |
968 | self.set_tag_attribute_values = value; |
969 | self |
970 | } |
971 | |
972 | /// Add an attribute value to set on a specific element. |
973 | /// |
974 | /// # Examples |
975 | /// |
976 | /// let a = ammonia::Builder::default() |
977 | /// .add_tags(&["my-tag"]) |
978 | /// .set_tag_attribute_value("my-tag", "my-attr", "val") |
979 | /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string(); |
980 | /// assert_eq!("<my-tag my-attr=\"val\">test</my-tag> <span>mess</span>", a); |
981 | pub fn set_tag_attribute_value< |
982 | T: 'a + ?Sized + Borrow<str>, |
983 | A: 'a + ?Sized + Borrow<str>, |
984 | V: 'a + ?Sized + Borrow<str>, |
985 | >( |
986 | &mut self, |
987 | tag: &'a T, |
988 | attribute: &'a A, |
989 | value: &'a V, |
990 | ) -> &mut Self { |
991 | self.set_tag_attribute_values |
992 | .entry(tag.borrow()) |
993 | .or_insert_with(HashMap::new) |
994 | .insert(attribute.borrow(), value.borrow()); |
995 | self |
996 | } |
997 | |
998 | /// Remove existing tag-specific attribute values to be set. |
999 | /// |
1000 | /// Does nothing if the attribute is already gone. |
1001 | /// |
1002 | /// # Examples |
1003 | /// |
1004 | /// let a = ammonia::Builder::default() |
1005 | /// // this does nothing, since no value is set for this tag attribute yet |
1006 | /// .rm_set_tag_attribute_value("a", "target") |
1007 | /// .set_tag_attribute_value("a", "target", "_blank") |
1008 | /// .rm_set_tag_attribute_value("a", "target") |
1009 | /// .clean("<a href=\"/\"></a>").to_string(); |
1010 | /// assert_eq!("<a href=\"/\" rel=\"noopener noreferrer\"></a>", a); |
1011 | pub fn rm_set_tag_attribute_value< |
1012 | T: 'a + ?Sized + Borrow<str>, |
1013 | A: 'a + ?Sized + Borrow<str>, |
1014 | >( |
1015 | &mut self, |
1016 | tag: &'a T, |
1017 | attribute: &'a A, |
1018 | ) -> &mut Self { |
1019 | if let Some(attributes) = self.set_tag_attribute_values.get_mut(tag.borrow()) { |
1020 | attributes.remove(attribute.borrow()); |
1021 | } |
1022 | self |
1023 | } |
1024 | |
1025 | /// Returns the value that will be set for the attribute on the element, if any. |
1026 | /// |
1027 | /// # Examples |
1028 | /// |
1029 | /// let mut b = ammonia::Builder::default(); |
1030 | /// b.set_tag_attribute_value("a", "target", "_blank"); |
1031 | /// let value = b.get_set_tag_attribute_value("a", "target"); |
1032 | /// assert_eq!(value, Some("_blank")); |
1033 | pub fn get_set_tag_attribute_value< |
1034 | T: 'a + ?Sized + Borrow<str>, |
1035 | A: 'a + ?Sized + Borrow<str>, |
1036 | >( |
1037 | &self, |
1038 | tag: &'a T, |
1039 | attribute: &'a A, |
1040 | ) -> Option<&'a str> { |
1041 | self.set_tag_attribute_values |
1042 | .get(tag.borrow()) |
1043 | .and_then(|map| map.get(attribute.borrow())) |
1044 | .copied() |
1045 | } |
1046 | |
1047 | /// Returns a copy of the set of tag-specific attribute values to be set. |
1048 | /// |
1049 | /// # Examples |
1050 | /// |
1051 | /// use maplit::{hashmap, hashset}; |
1052 | /// |
1053 | /// let attribute_values = hashmap![ |
1054 | /// "my-attr-1" => "foo", |
1055 | /// "my-attr-2" => "bar", |
1056 | /// ]; |
1057 | /// let set_tag_attribute_values = hashmap![ |
1058 | /// "my-tag" => attribute_values, |
1059 | /// ]; |
1060 | /// |
1061 | /// let mut b = ammonia::Builder::default(); |
1062 | /// b.set_tag_attribute_values(Clone::clone(&set_tag_attribute_values)); |
1063 | /// assert_eq!(set_tag_attribute_values, b.clone_set_tag_attribute_values()); |
1064 | pub fn clone_set_tag_attribute_values(&self) -> HashMap<&'a str, HashMap<&'a str, &'a str>> { |
1065 | self.set_tag_attribute_values.clone() |
1066 | } |
1067 | |
1068 | /// Sets the prefix of attributes that are allowed on any tag. |
1069 | /// |
1070 | /// # Examples |
1071 | /// |
1072 | /// use ammonia::Builder; |
1073 | /// use maplit::hashset; |
1074 | /// |
1075 | /// # fn main() { |
1076 | /// let prefixes = hashset!["data-"]; |
1077 | /// let a = Builder::new() |
1078 | /// .generic_attribute_prefixes(prefixes) |
1079 | /// .clean("<b data-val=1>") |
1080 | /// .to_string(); |
1081 | /// assert_eq!(a, "<b data-val=\"1\"></b>"); |
1082 | /// # } |
1083 | /// |
1084 | /// # Defaults |
1085 | /// |
1086 | /// No attribute prefixes are allowed by default. |
1087 | pub fn generic_attribute_prefixes(&mut self, value: HashSet<&'a str>) -> &mut Self { |
1088 | self.generic_attribute_prefixes = Some(value); |
1089 | self |
1090 | } |
1091 | |
1092 | /// Add additional whitelisted attribute prefix without overwriting old ones. |
1093 | /// |
1094 | /// # Examples |
1095 | /// |
1096 | /// let a = ammonia::Builder::default() |
1097 | /// .add_generic_attribute_prefixes(&["my-"]) |
1098 | /// .clean("<span my-attr>mess</span>").to_string(); |
1099 | /// assert_eq!("<span my-attr=\"\">mess</span>", a); |
1100 | pub fn add_generic_attribute_prefixes< |
1101 | T: 'a + ?Sized + Borrow<str>, |
1102 | I: IntoIter<Item = &'a T>, |
1103 | >( |
1104 | &mut self, |
1105 | it: I, |
1106 | ) -> &mut Self { |
1107 | self.generic_attribute_prefixes |
1108 | .get_or_insert_with(HashSet::new) |
1109 | .extend(it.into_iter().map(Borrow::borrow)); |
1110 | self |
1111 | } |
1112 | |
1113 | /// Remove already-whitelisted attribute prefixes. |
1114 | /// |
1115 | /// Does nothing if the attribute prefix is already gone. |
1116 | /// |
1117 | /// # Examples |
1118 | /// |
1119 | /// let a = ammonia::Builder::default() |
1120 | /// .add_generic_attribute_prefixes(&["data-", "code-"]) |
1121 | /// .rm_generic_attribute_prefixes(&["data-"]) |
1122 | /// .clean("<span code-test=\"foo\" data-test=\"cool\"></span>").to_string(); |
1123 | /// assert_eq!("<span code-test=\"foo\"></span>", a); |
1124 | pub fn rm_generic_attribute_prefixes< |
1125 | 'b, |
1126 | T: 'b + ?Sized + Borrow<str>, |
1127 | I: IntoIter<Item = &'b T>, |
1128 | >( |
1129 | &mut self, |
1130 | it: I, |
1131 | ) -> &mut Self { |
1132 | if let Some(true) = self.generic_attribute_prefixes.as_mut().map(|prefixes| { |
1133 | for i in it { |
1134 | let _ = prefixes.remove(i.borrow()); |
1135 | } |
1136 | prefixes.is_empty() |
1137 | }) { |
1138 | self.generic_attribute_prefixes = None; |
1139 | } |
1140 | self |
1141 | } |
1142 | |
1143 | /// Returns a copy of the set of whitelisted attribute prefixes. |
1144 | /// |
1145 | /// # Examples |
1146 | /// |
1147 | /// use maplit::hashset; |
1148 | /// |
1149 | /// let generic_attribute_prefixes = hashset!["my-prfx-1-", "my-prfx-2-"]; |
1150 | /// |
1151 | /// let mut b = ammonia::Builder::default(); |
1152 | /// b.generic_attribute_prefixes(Clone::clone(&generic_attribute_prefixes)); |
1153 | /// assert_eq!(Some(generic_attribute_prefixes), b.clone_generic_attribute_prefixes()); |
1154 | pub fn clone_generic_attribute_prefixes(&self) -> Option<HashSet<&'a str>> { |
1155 | self.generic_attribute_prefixes.clone() |
1156 | } |
1157 | |
1158 | /// Sets the attributes that are allowed on any tag. |
1159 | /// |
1160 | /// # Examples |
1161 | /// |
1162 | /// use ammonia::Builder; |
1163 | /// use maplit::hashset; |
1164 | /// |
1165 | /// # fn main() { |
1166 | /// let attributes = hashset!["data-val"]; |
1167 | /// let a = Builder::new() |
1168 | /// .generic_attributes(attributes) |
1169 | /// .clean("<b data-val=1>") |
1170 | /// .to_string(); |
1171 | /// assert_eq!(a, "<b data-val=\"1\"></b>"); |
1172 | /// # } |
1173 | /// |
1174 | /// # Defaults |
1175 | /// |
1176 | /// ```notest |
1177 | /// lang, title |
1178 | /// ``` |
1179 | pub fn generic_attributes(&mut self, value: HashSet<&'a str>) -> &mut Self { |
1180 | self.generic_attributes = value; |
1181 | self |
1182 | } |
1183 | |
1184 | /// Add additonal whitelisted attributes without overwriting old ones. |
1185 | /// |
1186 | /// # Examples |
1187 | /// |
1188 | /// let a = ammonia::Builder::default() |
1189 | /// .add_generic_attributes(&["my-attr"]) |
1190 | /// .clean("<span my-attr>mess</span>").to_string(); |
1191 | /// assert_eq!("<span my-attr=\"\">mess</span>", a); |
1192 | pub fn add_generic_attributes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( |
1193 | &mut self, |
1194 | it: I, |
1195 | ) -> &mut Self { |
1196 | self.generic_attributes |
1197 | .extend(it.into_iter().map(Borrow::borrow)); |
1198 | self |
1199 | } |
1200 | |
1201 | /// Remove already-whitelisted attributes. |
1202 | /// |
1203 | /// Does nothing if the attribute is already gone. |
1204 | /// |
1205 | /// # Examples |
1206 | /// |
1207 | /// let a = ammonia::Builder::default() |
1208 | /// .rm_generic_attributes(&["title"]) |
1209 | /// .clean("<span title=\"cool\"></span>").to_string(); |
1210 | /// assert_eq!("<span></span>", a); |
1211 | pub fn rm_generic_attributes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( |
1212 | &mut self, |
1213 | it: I, |
1214 | ) -> &mut Self { |
1215 | for i in it { |
1216 | self.generic_attributes.remove(i.borrow()); |
1217 | } |
1218 | self |
1219 | } |
1220 | |
1221 | /// Returns a copy of the set of whitelisted attributes. |
1222 | /// |
1223 | /// # Examples |
1224 | /// |
1225 | /// use maplit::hashset; |
1226 | /// |
1227 | /// let generic_attributes = hashset!["my-attr-1", "my-attr-2"]; |
1228 | /// |
1229 | /// let mut b = ammonia::Builder::default(); |
1230 | /// b.generic_attributes(Clone::clone(&generic_attributes)); |
1231 | /// assert_eq!(generic_attributes, b.clone_generic_attributes()); |
1232 | pub fn clone_generic_attributes(&self) -> HashSet<&'a str> { |
1233 | self.generic_attributes.clone() |
1234 | } |
1235 | |
1236 | /// Sets the URL schemes permitted on `href` and `src` attributes. |
1237 | /// |
1238 | /// # Examples |
1239 | /// |
1240 | /// use ammonia::Builder; |
1241 | /// use maplit::hashset; |
1242 | /// |
1243 | /// # fn main() { |
1244 | /// let url_schemes = hashset![ |
1245 | /// "http", "https", "mailto", "magnet" |
1246 | /// ]; |
1247 | /// let a = Builder::new().url_schemes(url_schemes) |
1248 | /// .clean("<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\">zero-length file</a>") |
1249 | /// .to_string(); |
1250 | /// |
1251 | /// // See `link_rel` for information on the rel="noopener noreferrer" attribute |
1252 | /// // in the cleaned HTML. |
1253 | /// assert_eq!(a, |
1254 | /// "<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\" rel=\"noopener noreferrer\">zero-length file</a>"); |
1255 | /// # } |
1256 | /// |
1257 | /// # Defaults |
1258 | /// |
1259 | /// ```notest |
1260 | /// bitcoin, ftp, ftps, geo, http, https, im, irc, |
1261 | /// ircs, magnet, mailto, mms, mx, news, nntp, |
1262 | /// openpgp4fpr, sip, sms, smsto, ssh, tel, url, |
1263 | /// webcal, wtai, xmpp |
1264 | /// ``` |
1265 | pub fn url_schemes(&mut self, value: HashSet<&'a str>) -> &mut Self { |
1266 | self.url_schemes = value; |
1267 | self |
1268 | } |
1269 | |
1270 | /// Add additonal whitelisted URL schemes without overwriting old ones. |
1271 | /// |
1272 | /// # Examples |
1273 | /// |
1274 | /// let a = ammonia::Builder::default() |
1275 | /// .add_url_schemes(&["my-scheme"]) |
1276 | /// .clean("<a href=my-scheme:home>mess</span>").to_string(); |
1277 | /// assert_eq!("<a href=\"my-scheme:home\" rel=\"noopener noreferrer\">mess</a>", a); |
1278 | pub fn add_url_schemes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( |
1279 | &mut self, |
1280 | it: I, |
1281 | ) -> &mut Self { |
1282 | self.url_schemes.extend(it.into_iter().map(Borrow::borrow)); |
1283 | self |
1284 | } |
1285 | |
1286 | /// Remove already-whitelisted attributes. |
1287 | /// |
1288 | /// Does nothing if the attribute is already gone. |
1289 | /// |
1290 | /// # Examples |
1291 | /// |
1292 | /// let a = ammonia::Builder::default() |
1293 | /// .rm_url_schemes(&["ftp"]) |
1294 | /// .clean("<a href=\"ftp://ftp.mozilla.org/\"></a>").to_string(); |
1295 | /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a); |
1296 | pub fn rm_url_schemes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( |
1297 | &mut self, |
1298 | it: I, |
1299 | ) -> &mut Self { |
1300 | for i in it { |
1301 | self.url_schemes.remove(i.borrow()); |
1302 | } |
1303 | self |
1304 | } |
1305 | |
1306 | /// Returns a copy of the set of whitelisted URL schemes. |
1307 | /// |
1308 | /// # Examples |
1309 | /// use maplit::hashset; |
1310 | /// |
1311 | /// let url_schemes = hashset!["my-scheme-1", "my-scheme-2"]; |
1312 | /// |
1313 | /// let mut b = ammonia::Builder::default(); |
1314 | /// b.url_schemes(Clone::clone(&url_schemes)); |
1315 | /// assert_eq!(url_schemes, b.clone_url_schemes()); |
1316 | pub fn clone_url_schemes(&self) -> HashSet<&'a str> { |
1317 | self.url_schemes.clone() |
1318 | } |
1319 | |
1320 | /// Configures the behavior for relative URLs: pass-through, resolve-with-base, or deny. |
1321 | /// |
1322 | /// # Examples |
1323 | /// |
1324 | /// use ammonia::{Builder, UrlRelative}; |
1325 | /// |
1326 | /// let a = Builder::new().url_relative(UrlRelative::PassThrough) |
1327 | /// .clean("<a href=/>Home</a>") |
1328 | /// .to_string(); |
1329 | /// |
1330 | /// // See `link_rel` for information on the rel="noopener noreferrer" attribute |
1331 | /// // in the cleaned HTML. |
1332 | /// assert_eq!( |
1333 | /// a, |
1334 | /// "<a href=\"/\" rel=\"noopener noreferrer\">Home</a>"); |
1335 | /// |
1336 | /// # Defaults |
1337 | /// |
1338 | /// ```notest |
1339 | /// UrlRelative::PassThrough |
1340 | /// ``` |
1341 | pub fn url_relative(&mut self, value: UrlRelative) -> &mut Self { |
1342 | self.url_relative = value; |
1343 | self |
1344 | } |
1345 | |
1346 | /// Allows rewriting of all attributes using a callback. |
1347 | /// |
1348 | /// The callback takes name of the element, attribute and its value. |
1349 | /// Returns `None` to remove the attribute, or a value to use. |
1350 | /// |
1351 | /// Rewriting of attributes with URLs is done before `url_relative()`. |
1352 | /// |
1353 | /// # Panics |
1354 | /// |
1355 | /// If more than one callback is set. |
1356 | /// |
1357 | /// # Examples |
1358 | /// |
1359 | /// ```rust |
1360 | /// use ammonia::Builder; |
1361 | /// let a = Builder::new() |
1362 | /// .attribute_filter(|element, attribute, value| { |
1363 | /// match (element, attribute) { |
1364 | /// ("img" , "src" ) => None, |
1365 | /// _ => Some(value.into()) |
1366 | /// } |
1367 | /// }) |
1368 | /// .link_rel(None) |
1369 | /// .clean("<a href=/><img alt=Home src=foo></a>" ) |
1370 | /// .to_string(); |
1371 | /// assert_eq!(a, |
1372 | /// r#"<a href="/"><img alt="Home"></a>"# ); |
1373 | /// ``` |
1374 | pub fn attribute_filter<'cb, CallbackFn>(&mut self, callback: CallbackFn) -> &mut Self |
1375 | where |
1376 | CallbackFn: for<'u> Fn(&str, &str, &'u str) -> Option<Cow<'u, str>> + Send + Sync + 'static, |
1377 | { |
1378 | assert!( |
1379 | self.attribute_filter.is_none(), |
1380 | "attribute_filter can be set only once" |
1381 | ); |
1382 | self.attribute_filter = Some(Box::new(callback)); |
1383 | self |
1384 | } |
1385 | |
1386 | /// Returns `true` if the relative URL resolver is set to `Deny`. |
1387 | /// |
1388 | /// # Examples |
1389 | /// |
1390 | /// use ammonia::{Builder, UrlRelative}; |
1391 | /// let mut a = Builder::default(); |
1392 | /// a.url_relative(UrlRelative::Deny); |
1393 | /// assert!(a.is_url_relative_deny()); |
1394 | /// a.url_relative(UrlRelative::PassThrough); |
1395 | /// assert!(!a.is_url_relative_deny()); |
1396 | pub fn is_url_relative_deny(&self) -> bool { |
1397 | matches!(self.url_relative, UrlRelative::Deny) |
1398 | } |
1399 | |
1400 | /// Returns `true` if the relative URL resolver is set to `PassThrough`. |
1401 | /// |
1402 | /// # Examples |
1403 | /// |
1404 | /// use ammonia::{Builder, UrlRelative}; |
1405 | /// let mut a = Builder::default(); |
1406 | /// a.url_relative(UrlRelative::Deny); |
1407 | /// assert!(!a.is_url_relative_pass_through()); |
1408 | /// a.url_relative(UrlRelative::PassThrough); |
1409 | /// assert!(a.is_url_relative_pass_through()); |
1410 | pub fn is_url_relative_pass_through(&self) -> bool { |
1411 | matches!(self.url_relative, UrlRelative::PassThrough) |
1412 | } |
1413 | |
1414 | /// Returns `true` if the relative URL resolver is set to `Custom`. |
1415 | /// |
1416 | /// # Examples |
1417 | /// |
1418 | /// use ammonia::{Builder, UrlRelative}; |
1419 | /// use std::borrow::Cow; |
1420 | /// fn test(a: &str) -> Option<Cow<str>> { None } |
1421 | /// # fn main() { |
1422 | /// let mut a = Builder::default(); |
1423 | /// a.url_relative(UrlRelative::Custom(Box::new(test))); |
1424 | /// assert!(a.is_url_relative_custom()); |
1425 | /// a.url_relative(UrlRelative::PassThrough); |
1426 | /// assert!(!a.is_url_relative_custom()); |
1427 | /// a.url_relative(UrlRelative::Deny); |
1428 | /// assert!(!a.is_url_relative_custom()); |
1429 | /// # } |
1430 | pub fn is_url_relative_custom(&self) -> bool { |
1431 | matches!(self.url_relative, UrlRelative::Custom(_)) |
1432 | } |
1433 | |
1434 | /// Configures a `rel` attribute that will be added on links. |
1435 | /// |
1436 | /// If `rel` is in the generic or tag attributes, this must be set to `None`. |
1437 | /// Common `rel` values to include: |
1438 | /// |
1439 | /// * `noopener`: This prevents [a particular type of XSS attack], |
1440 | /// and should usually be turned on for untrusted HTML. |
1441 | /// * `noreferrer`: This prevents the browser from [sending the source URL] |
1442 | /// to the website that is linked to. |
1443 | /// * `nofollow`: This prevents search engines from [using this link for |
1444 | /// ranking], which disincentivizes spammers. |
1445 | /// |
1446 | /// To turn on rel-insertion, call this function with a space-separated list. |
1447 | /// Ammonia does not parse rel-attributes; |
1448 | /// it just puts the given string into the attribute directly. |
1449 | /// |
1450 | /// [a particular type of XSS attack]: https://mathiasbynens.github.io/rel-noopener/ |
1451 | /// [sending the source URL]: https://en.wikipedia.org/wiki/HTTP_referer |
1452 | /// [using this link for ranking]: https://en.wikipedia.org/wiki/Nofollow |
1453 | /// |
1454 | /// # Examples |
1455 | /// |
1456 | /// use ammonia::Builder; |
1457 | /// |
1458 | /// let a = Builder::new().link_rel(None) |
1459 | /// .clean("<a href=https://rust-lang.org/>Rust</a>") |
1460 | /// .to_string(); |
1461 | /// assert_eq!( |
1462 | /// a, |
1463 | /// "<a href=\"https://rust-lang.org/\">Rust</a>"); |
1464 | /// |
1465 | /// # Defaults |
1466 | /// |
1467 | /// ```notest |
1468 | /// Some("noopener noreferrer") |
1469 | /// ``` |
1470 | pub fn link_rel(&mut self, value: Option<&'a str>) -> &mut Self { |
1471 | self.link_rel = value; |
1472 | self |
1473 | } |
1474 | |
1475 | /// Returns the settings for links' `rel` attribute, if one is set. |
1476 | /// |
1477 | /// # Examples |
1478 | /// |
1479 | /// use ammonia::{Builder, UrlRelative}; |
1480 | /// let mut a = Builder::default(); |
1481 | /// a.link_rel(Some("a b")); |
1482 | /// assert_eq!(a.get_link_rel(), Some("a b")); |
1483 | pub fn get_link_rel(&self) -> Option<&str> { |
1484 | self.link_rel |
1485 | } |
1486 | |
1487 | /// Sets the CSS classes that are allowed on specific tags. |
1488 | /// |
1489 | /// The values is structured as a map from tag names to a set of class names. |
1490 | /// |
1491 | /// If the `class` attribute is itself whitelisted for a tag, then adding entries to |
1492 | /// this map will cause a panic. |
1493 | /// |
1494 | /// # Examples |
1495 | /// |
1496 | /// use ammonia::Builder; |
1497 | /// use maplit::{hashmap, hashset}; |
1498 | /// |
1499 | /// # fn main() { |
1500 | /// let allowed_classes = hashmap![ |
1501 | /// "code" => hashset!["rs", "ex", "c", "cxx", "js"] |
1502 | /// ]; |
1503 | /// let a = Builder::new() |
1504 | /// .allowed_classes(allowed_classes) |
1505 | /// .clean("<code class=rs>fn main() {}</code>") |
1506 | /// .to_string(); |
1507 | /// assert_eq!( |
1508 | /// a, |
1509 | /// "<code class=\"rs\">fn main() {}</code>"); |
1510 | /// # } |
1511 | /// |
1512 | /// # Defaults |
1513 | /// |
1514 | /// The set of allowed classes is empty by default. |
1515 | pub fn allowed_classes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self { |
1516 | self.allowed_classes = value; |
1517 | self |
1518 | } |
1519 | |
1520 | /// Add additonal whitelisted classes without overwriting old ones. |
1521 | /// |
1522 | /// # Examples |
1523 | /// |
1524 | /// let a = ammonia::Builder::default() |
1525 | /// .add_allowed_classes("a", &["onebox"]) |
1526 | /// .clean("<a href=/ class=onebox>mess</span>").to_string(); |
1527 | /// assert_eq!("<a href=\"/\" class=\"onebox\" rel=\"noopener noreferrer\">mess</a>", a); |
1528 | pub fn add_allowed_classes< |
1529 | T: 'a + ?Sized + Borrow<str>, |
1530 | U: 'a + ?Sized + Borrow<str>, |
1531 | I: IntoIter<Item = &'a T>, |
1532 | >( |
1533 | &mut self, |
1534 | tag: &'a U, |
1535 | it: I, |
1536 | ) -> &mut Self { |
1537 | self.allowed_classes |
1538 | .entry(tag.borrow()) |
1539 | .or_insert_with(HashSet::new) |
1540 | .extend(it.into_iter().map(Borrow::borrow)); |
1541 | self |
1542 | } |
1543 | |
1544 | /// Remove already-whitelisted attributes. |
1545 | /// |
1546 | /// Does nothing if the attribute is already gone. |
1547 | /// |
1548 | /// # Examples |
1549 | /// |
1550 | /// let a = ammonia::Builder::default() |
1551 | /// .add_allowed_classes("span", &["active"]) |
1552 | /// .rm_allowed_classes("span", &["active"]) |
1553 | /// .clean("<span class=active>").to_string(); |
1554 | /// assert_eq!("<span class=\"\"></span>", a); |
1555 | pub fn rm_allowed_classes< |
1556 | 'b, |
1557 | 'c, |
1558 | T: 'b + ?Sized + Borrow<str>, |
1559 | U: 'c + ?Sized + Borrow<str>, |
1560 | I: IntoIter<Item = &'b T>, |
1561 | >( |
1562 | &mut self, |
1563 | tag: &'c U, |
1564 | it: I, |
1565 | ) -> &mut Self { |
1566 | if let Some(tag) = self.allowed_classes.get_mut(tag.borrow()) { |
1567 | for i in it { |
1568 | tag.remove(i.borrow()); |
1569 | } |
1570 | } |
1571 | self |
1572 | } |
1573 | |
1574 | /// Returns a copy of the set of whitelisted class attributes. |
1575 | /// |
1576 | /// # Examples |
1577 | /// |
1578 | /// use maplit::{hashmap, hashset}; |
1579 | /// |
1580 | /// let allowed_classes = hashmap![ |
1581 | /// "my-tag" => hashset!["my-class-1", "my-class-2"] |
1582 | /// ]; |
1583 | /// |
1584 | /// let mut b = ammonia::Builder::default(); |
1585 | /// b.allowed_classes(Clone::clone(&allowed_classes)); |
1586 | /// assert_eq!(allowed_classes, b.clone_allowed_classes()); |
1587 | pub fn clone_allowed_classes(&self) -> HashMap<&'a str, HashSet<&'a str>> { |
1588 | self.allowed_classes.clone() |
1589 | } |
1590 | |
1591 | /// Configures the handling of HTML comments. |
1592 | /// |
1593 | /// If this option is false, comments will be preserved. |
1594 | /// |
1595 | /// # Examples |
1596 | /// |
1597 | /// use ammonia::Builder; |
1598 | /// |
1599 | /// let a = Builder::new().strip_comments(false) |
1600 | /// .clean("<!-- yes -->") |
1601 | /// .to_string(); |
1602 | /// assert_eq!( |
1603 | /// a, |
1604 | /// "<!-- yes -->"); |
1605 | /// |
1606 | /// # Defaults |
1607 | /// |
1608 | /// `true` |
1609 | pub fn strip_comments(&mut self, value: bool) -> &mut Self { |
1610 | self.strip_comments = value; |
1611 | self |
1612 | } |
1613 | |
1614 | /// Returns `true` if comment stripping is turned on. |
1615 | /// |
1616 | /// # Examples |
1617 | /// |
1618 | /// let mut a = ammonia::Builder::new(); |
1619 | /// a.strip_comments(true); |
1620 | /// assert!(a.will_strip_comments()); |
1621 | /// a.strip_comments(false); |
1622 | /// assert!(!a.will_strip_comments()); |
1623 | pub fn will_strip_comments(&self) -> bool { |
1624 | self.strip_comments |
1625 | } |
1626 | |
1627 | /// Prefixes all "id" attribute values with a given string. Note that the tag and |
1628 | /// attribute themselves must still be whitelisted. |
1629 | /// |
1630 | /// # Examples |
1631 | /// |
1632 | /// use ammonia::Builder; |
1633 | /// use maplit::hashset; |
1634 | /// |
1635 | /// # fn main() { |
1636 | /// let attributes = hashset!["id"]; |
1637 | /// let a = Builder::new() |
1638 | /// .generic_attributes(attributes) |
1639 | /// .id_prefix(Some("safe-")) |
1640 | /// .clean("<b id=42>") |
1641 | /// .to_string(); |
1642 | /// assert_eq!(a, "<b id=\"safe-42\"></b>"); |
1643 | /// # } |
1644 | |
1645 | /// |
1646 | /// # Defaults |
1647 | /// |
1648 | /// `None` |
1649 | pub fn id_prefix(&mut self, value: Option<&'a str>) -> &mut Self { |
1650 | self.id_prefix = value; |
1651 | self |
1652 | } |
1653 | |
1654 | /// Constructs a [`Builder`] instance configured with the [default options]. |
1655 | /// |
1656 | /// # Examples |
1657 | /// |
1658 | /// use ammonia::{Builder, Url, UrlRelative}; |
1659 | /// # use std::error::Error; |
1660 | /// |
1661 | /// # fn do_main() -> Result<(), Box<Error>> { |
1662 | /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>."; |
1663 | /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>."; |
1664 | /// |
1665 | /// let result = Builder::new() // <-- |
1666 | /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?)) |
1667 | /// .clean(input) |
1668 | /// .to_string(); |
1669 | /// assert_eq!(result, output); |
1670 | /// # Ok(()) |
1671 | /// # } |
1672 | /// # fn main() { do_main().unwrap() } |
1673 | /// |
1674 | /// [default options]: fn.clean.html |
1675 | /// [`Builder`]: struct.Builder.html |
1676 | pub fn new() -> Self { |
1677 | Self::default() |
1678 | } |
1679 | |
1680 | /// Constructs a [`Builder`] instance configured with no allowed tags. |
1681 | /// |
1682 | /// # Examples |
1683 | /// |
1684 | /// use ammonia::{Builder, Url, UrlRelative}; |
1685 | /// # use std::error::Error; |
1686 | /// |
1687 | /// # fn do_main() -> Result<(), Box<Error>> { |
1688 | /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>empty()</code> function</a>."; |
1689 | /// let output = "This is an Ammonia example using the empty() function."; |
1690 | /// |
1691 | /// let result = Builder::empty() // <-- |
1692 | /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?)) |
1693 | /// .clean(input) |
1694 | /// .to_string(); |
1695 | /// assert_eq!(result, output); |
1696 | /// # Ok(()) |
1697 | /// # } |
1698 | /// # fn main() { do_main().unwrap() } |
1699 | /// |
1700 | /// [default options]: fn.clean.html |
1701 | /// [`Builder`]: struct.Builder.html |
1702 | pub fn empty() -> Self { |
1703 | Self { |
1704 | tags: hashset![], |
1705 | ..Self::default() |
1706 | } |
1707 | } |
1708 | |
1709 | /// Sanitizes an HTML fragment in a string according to the configured options. |
1710 | /// |
1711 | /// # Examples |
1712 | /// |
1713 | /// use ammonia::{Builder, Url, UrlRelative}; |
1714 | /// # use std::error::Error; |
1715 | /// |
1716 | /// # fn do_main() -> Result<(), Box<Error>> { |
1717 | /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>."; |
1718 | /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>."; |
1719 | /// |
1720 | /// let result = Builder::new() |
1721 | /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?)) |
1722 | /// .clean(input) |
1723 | /// .to_string(); // <-- |
1724 | /// assert_eq!(result, output); |
1725 | /// # Ok(()) |
1726 | /// # } |
1727 | /// # fn main() { do_main().unwrap() } |
1728 | pub fn clean(&self, src: &str) -> Document { |
1729 | let parser = Self::make_parser(); |
1730 | let dom = parser.one(src); |
1731 | self.clean_dom(dom) |
1732 | } |
1733 | |
1734 | /// Sanitizes an HTML fragment from a reader according to the configured options. |
1735 | /// |
1736 | /// The input should be in UTF-8 encoding, otherwise the decoding is lossy, just |
1737 | /// like when using [`String::from_utf8_lossy`]. |
1738 | /// |
1739 | /// To avoid consuming the reader, a mutable reference can be passed to this method. |
1740 | /// |
1741 | /// # Examples |
1742 | /// |
1743 | /// use ammonia::Builder; |
1744 | /// # use std::error::Error; |
1745 | /// |
1746 | /// # fn do_main() -> Result<(), Box<Error>> { |
1747 | /// let a = Builder::new() |
1748 | /// .clean_from_reader(&b"<!-- no -->"[..])? // notice the `b` |
1749 | /// .to_string(); |
1750 | /// assert_eq!(a, ""); |
1751 | /// # Ok(()) } |
1752 | /// # fn main() { do_main().unwrap() } |
1753 | /// |
1754 | /// [`String::from_utf8_lossy`]: https://doc.rust-lang.org/std/string/struct.String.html#method.from_utf8_lossy |
1755 | pub fn clean_from_reader<R>(&self, mut src: R) -> io::Result<Document> |
1756 | where |
1757 | R: io::Read, |
1758 | { |
1759 | let parser = Self::make_parser().from_utf8(); |
1760 | let dom = parser.read_from(&mut src)?; |
1761 | Ok(self.clean_dom(dom)) |
1762 | } |
1763 | |
1764 | /// Clean a post-parsing DOM. |
1765 | /// |
1766 | /// This is not a public API because RcDom isn't really stable. |
1767 | /// We want to be able to take breaking changes to html5ever itself |
1768 | /// without having to break Ammonia's API. |
1769 | fn clean_dom(&self, mut dom: RcDom) -> Document { |
1770 | let mut stack = Vec::new(); |
1771 | let mut removed = Vec::new(); |
1772 | let link_rel = self |
1773 | .link_rel |
1774 | .map(|link_rel| format_tendril!(" {}" , link_rel)); |
1775 | if link_rel.is_some() { |
1776 | assert!(self.generic_attributes.get("rel" ).is_none()); |
1777 | assert!(self |
1778 | .tag_attributes |
1779 | .get("a" ) |
1780 | .and_then(|a| a.get("rel" )) |
1781 | .is_none()); |
1782 | } |
1783 | assert!(self.allowed_classes.is_empty() || !self.generic_attributes.contains("class" )); |
1784 | for tag_name in self.allowed_classes.keys() { |
1785 | assert!(self |
1786 | .tag_attributes |
1787 | .get(tag_name) |
1788 | .and_then(|a| a.get("class" )) |
1789 | .is_none()); |
1790 | } |
1791 | for tag_name in &self.clean_content_tags { |
1792 | assert!(!self.tags.contains(tag_name)); |
1793 | assert!(!self.tag_attributes.contains_key(tag_name)); |
1794 | } |
1795 | let body = { |
1796 | let children = dom.document.children.borrow(); |
1797 | children[0].clone() |
1798 | }; |
1799 | stack.extend( |
1800 | mem::take(&mut *body.children.borrow_mut()) |
1801 | .into_iter() |
1802 | .rev(), |
1803 | ); |
1804 | // This design approach is used to prevent pathological content from producing |
1805 | // a stack overflow. The `stack` contains to-be-cleaned nodes, while `remove`, |
1806 | // of course, contains nodes that need to be dropped (we can't just drop them, |
1807 | // because they could have a very deep child tree). |
1808 | while let Some(mut node) = stack.pop() { |
1809 | let parent = node.parent |
1810 | .replace(None).expect("a node in the DOM will have a parent, except the root, which is not processed" ) |
1811 | .upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped" ); |
1812 | if self.clean_node_content(&node) { |
1813 | removed.push(node); |
1814 | continue; |
1815 | } |
1816 | let pass_clean = self.clean_child(&mut node); |
1817 | let pass = pass_clean && self.check_expected_namespace(&parent, &node); |
1818 | if pass { |
1819 | self.adjust_node_attributes(&mut node, &link_rel, self.id_prefix); |
1820 | dom.append(&parent.clone(), NodeOrText::AppendNode(node.clone())); |
1821 | } else { |
1822 | for sub in node.children.borrow_mut().iter_mut() { |
1823 | sub.parent.replace(Some(Rc::downgrade(&parent))); |
1824 | } |
1825 | } |
1826 | stack.extend( |
1827 | mem::take(&mut *node.children.borrow_mut()) |
1828 | .into_iter() |
1829 | .rev(), |
1830 | ); |
1831 | if !pass { |
1832 | removed.push(node); |
1833 | } |
1834 | } |
1835 | // Now, imperatively clean up all of the child nodes. |
1836 | // Otherwise, we could wind up with a DoS, either caused by a memory leak, |
1837 | // or caused by a stack overflow. |
1838 | while let Some(node) = removed.pop() { |
1839 | removed.extend_from_slice(&mem::take(&mut *node.children.borrow_mut())[..]); |
1840 | } |
1841 | Document(dom) |
1842 | } |
1843 | |
1844 | /// Returns `true` if a node and all its content should be removed. |
1845 | fn clean_node_content(&self, node: &Handle) -> bool { |
1846 | match node.data { |
1847 | NodeData::Text { .. } |
1848 | | NodeData::Comment { .. } |
1849 | | NodeData::Doctype { .. } |
1850 | | NodeData::Document |
1851 | | NodeData::ProcessingInstruction { .. } => false, |
1852 | NodeData::Element { ref name, .. } => self.clean_content_tags.contains(&*name.local), |
1853 | } |
1854 | } |
1855 | |
1856 | /// Remove unwanted attributes, and check if the node should be kept or not. |
1857 | /// |
1858 | /// The root node doesn't need cleaning because we create the root node ourselves, |
1859 | /// and it doesn't get serialized, and ... it just exists to give the parser |
1860 | /// a context (in this case, a div-like block context). |
1861 | fn clean_child(&self, child: &mut Handle) -> bool { |
1862 | match child.data { |
1863 | NodeData::Text { .. } => true, |
1864 | NodeData::Comment { .. } => !self.strip_comments, |
1865 | NodeData::Doctype { .. } |
1866 | | NodeData::Document |
1867 | | NodeData::ProcessingInstruction { .. } => false, |
1868 | NodeData::Element { |
1869 | ref name, |
1870 | ref attrs, |
1871 | .. |
1872 | } => { |
1873 | if self.tags.contains(&*name.local) { |
1874 | let attr_filter = |attr: &html5ever::Attribute| { |
1875 | let whitelisted = self.generic_attributes.contains(&*attr.name.local) |
1876 | || self.generic_attribute_prefixes.as_ref().map(|prefixes| { |
1877 | prefixes.iter().any(|&p| attr.name.local.starts_with(p)) |
1878 | }) == Some(true) |
1879 | || self |
1880 | .tag_attributes |
1881 | .get(&*name.local) |
1882 | .map(|ta| ta.contains(&*attr.name.local)) |
1883 | == Some(true) |
1884 | || self |
1885 | .tag_attribute_values |
1886 | .get(&*name.local) |
1887 | .and_then(|tav| tav.get(&*attr.name.local)) |
1888 | .map(|vs| { |
1889 | let attr_val = attr.value.to_lowercase(); |
1890 | vs.iter().any(|v| v.to_lowercase() == attr_val) |
1891 | }) |
1892 | == Some(true); |
1893 | if !whitelisted { |
1894 | // If the class attribute is not whitelisted, |
1895 | // but there is a whitelisted set of allowed_classes, |
1896 | // do not strip out the class attribute. |
1897 | // Banned classes will be filtered later. |
1898 | &*attr.name.local == "class" |
1899 | && self.allowed_classes.contains_key(&*name.local) |
1900 | } else if is_url_attr(&*name.local, &*attr.name.local) { |
1901 | let url = Url::parse(&*attr.value); |
1902 | if let Ok(url) = url { |
1903 | self.url_schemes.contains(url.scheme()) |
1904 | } else if url == Err(url::ParseError::RelativeUrlWithoutBase) { |
1905 | !matches!(self.url_relative, UrlRelative::Deny) |
1906 | } else { |
1907 | false |
1908 | } |
1909 | } else { |
1910 | true |
1911 | } |
1912 | }; |
1913 | attrs.borrow_mut().retain(attr_filter); |
1914 | true |
1915 | } else { |
1916 | false |
1917 | } |
1918 | } |
1919 | } |
1920 | } |
1921 | |
1922 | // Check for unexpected namespace changes. |
1923 | // |
1924 | // The issue happens if developers added to the list of allowed tags any |
1925 | // tag which is parsed in RCDATA state, PLAINTEXT state or RAWTEXT state, |
1926 | // that is: |
1927 | // |
1928 | // * title |
1929 | // * textarea |
1930 | // * xmp |
1931 | // * iframe |
1932 | // * noembed |
1933 | // * noframes |
1934 | // * plaintext |
1935 | // * noscript |
1936 | // * style |
1937 | // * script |
1938 | // |
1939 | // An example in the wild is Plume, that allows iframe [1]. So in next |
1940 | // examples I'll assume the following policy: |
1941 | // |
1942 | // Builder::new() |
1943 | // .add_tags(&["iframe"]) |
1944 | // |
1945 | // In HTML namespace `<iframe>` is parsed specially; that is, its content is |
1946 | // treated as text. For instance, the following html: |
1947 | // |
1948 | // <iframe><a>test |
1949 | // |
1950 | // Is parsed into the following DOM tree: |
1951 | // |
1952 | // iframe |
1953 | // └─ #text: <a>test |
1954 | // |
1955 | // So iframe cannot have any children other than a text node. |
1956 | // |
1957 | // The same is not true, though, in "foreign content"; that is, within |
1958 | // <svg> or <math> tags. The following html: |
1959 | // |
1960 | // <svg><iframe><a>test |
1961 | // |
1962 | // is parsed differently: |
1963 | // |
1964 | // svg |
1965 | // └─ iframe |
1966 | // └─ a |
1967 | // └─ #text: test |
1968 | // |
1969 | // So in SVG namespace iframe can have children. |
1970 | // |
1971 | // Ammonia disallows <svg> but it keeps its content after deleting it. And |
1972 | // the parser internally keeps track of the namespace of the element. So |
1973 | // assume we have the following snippet: |
1974 | // |
1975 | // <svg><iframe><a title="</iframe><img src onerror=alert(1)>">test |
1976 | // |
1977 | // It is parsed into: |
1978 | // |
1979 | // svg |
1980 | // └─ iframe |
1981 | // └─ a title="</iframe><img src onerror=alert(1)>" |
1982 | // └─ #text: test |
1983 | // |
1984 | // This DOM tree is harmless from ammonia point of view because the piece |
1985 | // of code that looks like XSS is in a title attribute. Hence, the |
1986 | // resulting "safe" HTML from ammonia would be: |
1987 | // |
1988 | // <iframe><a title="</iframe><img src onerror=alert(1)>" rel="noopener |
1989 | // noreferrer">test</a></iframe> |
1990 | // |
1991 | // However, at this point, the information about namespace is lost, which |
1992 | // means that the browser will parse this snippet into: |
1993 | // |
1994 | // ├─ iframe |
1995 | // │ └─ #text: <a title=" |
1996 | // ├─ img src="" onerror="alert(1)" |
1997 | // └─ #text: " rel="noopener noreferrer">test |
1998 | // |
1999 | // Leading to XSS. |
2000 | // |
2001 | // To solve this issue, check for unexpected namespace switches after cleanup. |
2002 | // Elements which change namespace at an unexpected point are removed. |
2003 | // This function returns `true` if `child` should be kept, and `false` if it |
2004 | // should be removed. |
2005 | // |
2006 | // [1]: https://github.com/Plume-org/Plume/blob/main/plume-models/src/safe_string.rs#L21 |
2007 | fn check_expected_namespace(&self, parent: &Handle, child: &Handle) -> bool { |
2008 | let (parent, child) = match (&parent.data, &child.data) { |
2009 | (NodeData::Element { name: pn, .. }, NodeData::Element { name: cn, .. }) => (pn, cn), |
2010 | _ => return true, |
2011 | }; |
2012 | // The only way to switch from html to svg is with the <svg> tag |
2013 | if parent.ns == ns!(html) && child.ns == ns!(svg) { |
2014 | child.local == local_name!("svg" ) |
2015 | // The only way to switch from html to mathml is with the <math> tag |
2016 | } else if parent.ns == ns!(html) && child.ns == ns!(mathml) { |
2017 | child.local == local_name!("math" ) |
2018 | // The only way to switch from mathml to svg/html is with a text integration point |
2019 | } else if parent.ns == ns!(mathml) && child.ns != ns!(mathml) { |
2020 | // https://html.spec.whatwg.org/#mathml |
2021 | matches!( |
2022 | &*parent.local, |
2023 | "mi" | "mo" | "mn" | "ms" | "mtext" | "annotation-xml" |
2024 | ) |
2025 | // The only way to switch from svg to mathml/html is with an html integration point |
2026 | } else if parent.ns == ns!(svg) && child.ns != ns!(svg) { |
2027 | // https://html.spec.whatwg.org/#svg-0 |
2028 | matches!(&*parent.local, "foreignObject" ) |
2029 | } else if child.ns == ns!(svg) { |
2030 | is_svg_tag(&*child.local) |
2031 | } else if child.ns == ns!(mathml) { |
2032 | is_mathml_tag(&*child.local) |
2033 | } else if child.ns == ns!(html) { |
2034 | (!is_svg_tag(&*child.local) && !is_mathml_tag(&*child.local)) |
2035 | || matches!( |
2036 | &*child.local, |
2037 | "title" | "style" | "font" | "a" | "script" | "span" |
2038 | ) |
2039 | } else { |
2040 | // There are no other supported ways to switch namespace |
2041 | parent.ns == child.ns |
2042 | } |
2043 | } |
2044 | |
2045 | /// Add and transform special-cased attributes and elements. |
2046 | /// |
2047 | /// This function handles: |
2048 | /// |
2049 | /// * relative URL rewriting |
2050 | /// * adding `<a rel>` attributes |
2051 | /// * filtering out banned classes |
2052 | fn adjust_node_attributes( |
2053 | &self, |
2054 | child: &mut Handle, |
2055 | link_rel: &Option<StrTendril>, |
2056 | id_prefix: Option<&'a str>, |
2057 | ) { |
2058 | if let NodeData::Element { |
2059 | ref name, |
2060 | ref attrs, |
2061 | .. |
2062 | } = child.data |
2063 | { |
2064 | if let Some(set_attrs) = self.set_tag_attribute_values.get(&*name.local) { |
2065 | let mut attrs = attrs.borrow_mut(); |
2066 | for (&set_name, &set_value) in set_attrs { |
2067 | // set the value of the attribute if the attribute is already present |
2068 | if let Some(attr) = attrs.iter_mut().find(|attr| &*attr.name.local == set_name) |
2069 | { |
2070 | if &*attr.value != set_value { |
2071 | attr.value = set_value.into(); |
2072 | } |
2073 | } else { |
2074 | // otherwise, add the attribute |
2075 | let attr = Attribute { |
2076 | name: QualName::new(None, ns!(), set_name.into()), |
2077 | value: set_value.into(), |
2078 | }; |
2079 | attrs.push(attr); |
2080 | } |
2081 | } |
2082 | } |
2083 | if let Some(ref link_rel) = *link_rel { |
2084 | if &*name.local == "a" { |
2085 | attrs.borrow_mut().push(Attribute { |
2086 | name: QualName::new(None, ns!(), local_name!("rel" )), |
2087 | value: link_rel.clone(), |
2088 | }) |
2089 | } |
2090 | } |
2091 | if let Some(ref id_prefix) = id_prefix { |
2092 | for attr in &mut *attrs.borrow_mut() { |
2093 | if &attr.name.local == "id" && !attr.value.starts_with(id_prefix) { |
2094 | attr.value = format_tendril!(" {}{}" , id_prefix, attr.value); |
2095 | } |
2096 | } |
2097 | } |
2098 | if let Some(ref attr_filter) = self.attribute_filter { |
2099 | let mut drop_attrs = Vec::new(); |
2100 | let mut attrs = attrs.borrow_mut(); |
2101 | for (i, attr) in &mut attrs.iter_mut().enumerate() { |
2102 | let replace_with = if let Some(new) = |
2103 | attr_filter.filter(&*name.local, &*attr.name.local, &*attr.value) |
2104 | { |
2105 | if *new != *attr.value { |
2106 | Some(format_tendril!(" {}" , new)) |
2107 | } else { |
2108 | None // no need to replace the attr if filter returned the same value |
2109 | } |
2110 | } else { |
2111 | drop_attrs.push(i); |
2112 | None |
2113 | }; |
2114 | if let Some(replace_with) = replace_with { |
2115 | attr.value = replace_with; |
2116 | } |
2117 | } |
2118 | for i in drop_attrs.into_iter().rev() { |
2119 | attrs.swap_remove(i); |
2120 | } |
2121 | } |
2122 | { |
2123 | let mut drop_attrs = Vec::new(); |
2124 | let mut attrs = attrs.borrow_mut(); |
2125 | for (i, attr) in attrs.iter_mut().enumerate() { |
2126 | if is_url_attr(&*name.local, &*attr.name.local) && is_url_relative(&*attr.value) |
2127 | { |
2128 | let new_value = self.url_relative.evaluate(&*attr.value); |
2129 | if let Some(new_value) = new_value { |
2130 | attr.value = new_value; |
2131 | } else { |
2132 | drop_attrs.push(i); |
2133 | } |
2134 | } |
2135 | } |
2136 | // Swap remove scrambles the vector after the current point. |
2137 | // We will not do anything except with items before the current point. |
2138 | // The `rev()` is, as such, necessary for correctness. |
2139 | // We could use regular `remove(usize)` and a forward iterator, |
2140 | // but that's slower. |
2141 | for i in drop_attrs.into_iter().rev() { |
2142 | attrs.swap_remove(i); |
2143 | } |
2144 | } |
2145 | if let Some(allowed_values) = self.allowed_classes.get(&*name.local) { |
2146 | for attr in &mut *attrs.borrow_mut() { |
2147 | if &attr.name.local == "class" { |
2148 | let mut classes = vec![]; |
2149 | // https://html.spec.whatwg.org/#global-attributes:classes-2 |
2150 | for class in attr.value.split_ascii_whitespace() { |
2151 | if allowed_values.contains(class) { |
2152 | classes.push(class.to_owned()); |
2153 | } |
2154 | } |
2155 | attr.value = format_tendril!(" {}" , classes.join(" " )); |
2156 | } |
2157 | } |
2158 | } |
2159 | } |
2160 | } |
2161 | |
2162 | /// Initializes an HTML fragment parser. |
2163 | /// |
2164 | /// Ammonia conforms to the HTML5 fragment parsing rules, |
2165 | /// by parsing the given fragment as if it were included in a <div> tag. |
2166 | fn make_parser() -> html::Parser<RcDom> { |
2167 | html::parse_fragment( |
2168 | RcDom::default(), |
2169 | html::ParseOpts::default(), |
2170 | QualName::new(None, ns!(html), local_name!("div" )), |
2171 | vec![], |
2172 | ) |
2173 | } |
2174 | } |
2175 | |
2176 | /// Given an element name and attribute name, determine if the given attribute contains a URL. |
2177 | fn is_url_attr(element: &str, attr: &str) -> bool { |
2178 | attr == "href" |
2179 | || attr == "src" |
2180 | || (element == "form" && attr == "action" ) |
2181 | || (element == "object" && attr == "data" ) |
2182 | || ((element == "button" || element == "input" ) && attr == "formaction" ) |
2183 | || (element == "a" && attr == "ping" ) |
2184 | || (element == "video" && attr == "poster" ) |
2185 | } |
2186 | |
2187 | /// Given an element name, check if it's SVG |
2188 | fn is_svg_tag(element: &str) -> bool { |
2189 | // https://svgwg.org/svg2-draft/eltindex.html |
2190 | matches!( |
2191 | element, |
2192 | "a" |
2193 | | "animate" |
2194 | | "animateMotion" |
2195 | | "animateTransform" |
2196 | | "circle" |
2197 | | "clipPath" |
2198 | | "defs" |
2199 | | "desc" |
2200 | | "discard" |
2201 | | "ellipse" |
2202 | | "feBlend" |
2203 | | "feColorMatrix" |
2204 | | "feComponentTransfer" |
2205 | | "feComposite" |
2206 | | "feConvolveMatrix" |
2207 | | "feDiffuseLighting" |
2208 | | "feDisplacementMap" |
2209 | | "feDistantLight" |
2210 | | "feDropShadow" |
2211 | | "feFlood" |
2212 | | "feFuncA" |
2213 | | "feFuncB" |
2214 | | "feFuncG" |
2215 | | "feFuncR" |
2216 | | "feGaussianBlur" |
2217 | | "feImage" |
2218 | | "feMerge" |
2219 | | "feMergeNode" |
2220 | | "feMorphology" |
2221 | | "feOffset" |
2222 | | "fePointLight" |
2223 | | "feSpecularLighting" |
2224 | | "feSpotLight" |
2225 | | "feTile" |
2226 | | "feTurbulence" |
2227 | | "filter" |
2228 | | "foreignObject" |
2229 | | "g" |
2230 | | "image" |
2231 | | "line" |
2232 | | "linearGradient" |
2233 | | "marker" |
2234 | | "mask" |
2235 | | "metadata" |
2236 | | "mpath" |
2237 | | "path" |
2238 | | "pattern" |
2239 | | "polygon" |
2240 | | "polyline" |
2241 | | "radialGradient" |
2242 | | "rect" |
2243 | | "script" |
2244 | | "set" |
2245 | | "stop" |
2246 | | "style" |
2247 | | "svg" |
2248 | | "switch" |
2249 | | "symbol" |
2250 | | "text" |
2251 | | "textPath" |
2252 | | "title" |
2253 | | "tspan" |
2254 | | "use" |
2255 | | "view" |
2256 | ) |
2257 | } |
2258 | |
2259 | /// Given an element name, check if it's Math |
2260 | fn is_mathml_tag(element: &str) -> bool { |
2261 | // https://svgwg.org/svg2-draft/eltindex.html |
2262 | matches!(element, |
2263 | "abs" |
2264 | | "and" |
2265 | | "annotation" |
2266 | | "annotation-xml" |
2267 | | "apply" |
2268 | | "approx" |
2269 | | "arccos" |
2270 | | "arccosh" |
2271 | | "arccot" |
2272 | | "arccoth" |
2273 | | "arccsc" |
2274 | | "arccsch" |
2275 | | "arcsec" |
2276 | | "arcsech" |
2277 | | "arcsin" |
2278 | | "arcsinh" |
2279 | | "arctan" |
2280 | | "arctanh" |
2281 | | "arg" |
2282 | | "bind" |
2283 | | "bvar" |
2284 | | "card" |
2285 | | "cartesianproduct" |
2286 | | "cbytes" |
2287 | | "ceiling" |
2288 | | "cerror" |
2289 | | "ci" |
2290 | | "cn" |
2291 | | "codomain" |
2292 | | "complexes" |
2293 | | "compose" |
2294 | | "condition" |
2295 | | "conjugate" |
2296 | | "cos" |
2297 | | "cosh" |
2298 | | "cot" |
2299 | | "coth" |
2300 | | "cs" |
2301 | | "csc" |
2302 | | "csch" |
2303 | | "csymbol" |
2304 | | "curl" |
2305 | | "declare" |
2306 | | "degree" |
2307 | | "determinant" |
2308 | | "diff" |
2309 | | "divergence" |
2310 | | "divide" |
2311 | | "domain" |
2312 | | "domainofapplication" |
2313 | | "emptyset" |
2314 | | "eq" |
2315 | | "equivalent" |
2316 | | "eulergamma" |
2317 | | "exists" |
2318 | | "exp" |
2319 | | "exponentiale" |
2320 | | "factorial" |
2321 | | "factorof" |
2322 | | "false" |
2323 | | "floor" |
2324 | | "fn" |
2325 | | "forall" |
2326 | | "gcd" |
2327 | | "geq" |
2328 | | "grad" |
2329 | | "gt" |
2330 | | "ident" |
2331 | | "image" |
2332 | | "imaginary" |
2333 | | "imaginaryi" |
2334 | | "implies" |
2335 | | "in" |
2336 | | "infinity" |
2337 | | "int" |
2338 | | "integers" |
2339 | | "intersect" |
2340 | | "interval" |
2341 | | "inverse" |
2342 | | "lambda" |
2343 | | "laplacian" |
2344 | | "lcm" |
2345 | | "leq" |
2346 | | "limit" |
2347 | | "list" |
2348 | | "ln" |
2349 | | "log" |
2350 | | "logbase" |
2351 | | "lowlimit" |
2352 | | "lt" |
2353 | | "maction" |
2354 | | "maligngroup" |
2355 | | "malignmark" |
2356 | | "math" |
2357 | | "matrix" |
2358 | | "matrixrow" |
2359 | | "max" |
2360 | | "mean" |
2361 | | "median" |
2362 | | "menclose" |
2363 | | "merror" |
2364 | | "mfenced" |
2365 | | "mfrac" |
2366 | | "mglyph" |
2367 | | "mi" |
2368 | | "min" |
2369 | | "minus" |
2370 | | "mlabeledtr" |
2371 | | "mlongdiv" |
2372 | | "mmultiscripts" |
2373 | | "mn" |
2374 | | "mo" |
2375 | | "mode" |
2376 | | "moment" |
2377 | | "momentabout" |
2378 | | "mover" |
2379 | | "mpadded" |
2380 | | "mphantom" |
2381 | | "mprescripts" |
2382 | | "mroot" |
2383 | | "mrow" |
2384 | | "ms" |
2385 | | "mscarries" |
2386 | | "mscarry" |
2387 | | "msgroup" |
2388 | | "msline" |
2389 | | "mspace" |
2390 | | "msqrt" |
2391 | | "msrow" |
2392 | | "mstack" |
2393 | | "mstyle" |
2394 | | "msub" |
2395 | | "msubsup" |
2396 | | "msup" |
2397 | | "mtable" |
2398 | | "mtd" |
2399 | | "mtext" |
2400 | | "mtr" |
2401 | | "munder" |
2402 | | "munderover" |
2403 | | "naturalnumbers" |
2404 | | "neq" |
2405 | | "none" |
2406 | | "not" |
2407 | | "notanumber" |
2408 | | "notin" |
2409 | | "notprsubset" |
2410 | | "notsubset" |
2411 | | "or" |
2412 | | "otherwise" |
2413 | | "outerproduct" |
2414 | | "partialdiff" |
2415 | | "pi" |
2416 | | "piece" |
2417 | | "piecewise" |
2418 | | "plus" |
2419 | | "power" |
2420 | | "primes" |
2421 | | "product" |
2422 | | "prsubset" |
2423 | | "quotient" |
2424 | | "rationals" |
2425 | | "real" |
2426 | | "reals" |
2427 | | "reln" |
2428 | | "rem" |
2429 | | "root" |
2430 | | "scalarproduct" |
2431 | | "sdev" |
2432 | | "sec" |
2433 | | "sech" |
2434 | | "selector" |
2435 | | "semantics" |
2436 | | "sep" |
2437 | | "set" |
2438 | | "setdiff" |
2439 | | "share" |
2440 | | "sin" |
2441 | | "sinh" |
2442 | | "span" |
2443 | | "subset" |
2444 | | "sum" |
2445 | | "tan" |
2446 | | "tanh" |
2447 | | "tendsto" |
2448 | | "times" |
2449 | | "transpose" |
2450 | | "true" |
2451 | | "union" |
2452 | | "uplimit" |
2453 | | "variance" |
2454 | | "vector" |
2455 | | "vectorproduct" |
2456 | | "xor" |
2457 | ) |
2458 | } |
2459 | |
2460 | fn is_url_relative(url: &str) -> bool { |
2461 | matches!( |
2462 | Url::parse(url), |
2463 | Err(url::ParseError::RelativeUrlWithoutBase) |
2464 | ) |
2465 | } |
2466 | |
2467 | /// Policy for [relative URLs], that is, URLs that do not specify the scheme in full. |
2468 | /// |
2469 | /// This policy kicks in, if set, for any attribute named `src` or `href`, |
2470 | /// as well as the `data` attribute of an `object` tag. |
2471 | /// |
2472 | /// [relative URLs]: struct.Builder.html#method.url_relative |
2473 | /// |
2474 | /// # Examples |
2475 | /// |
2476 | /// ## `Deny` |
2477 | /// |
2478 | /// * `<a href="test">` is a file-relative URL, and will be removed |
2479 | /// * `<a href="/test">` is a domain-relative URL, and will be removed |
2480 | /// * `<a href="//example.com/test">` is a scheme-relative URL, and will be removed |
2481 | /// * `<a href="http://example.com/test">` is an absolute URL, and will be kept |
2482 | /// |
2483 | /// ## `PassThrough` |
2484 | /// |
2485 | /// No changes will be made to any URLs, except if a disallowed scheme is used. |
2486 | /// |
2487 | /// ## `RewriteWithBase` |
2488 | /// |
2489 | /// If the base is set to `http://notriddle.com/some-directory/some-file` |
2490 | /// |
2491 | /// * `<a href="test">` will be rewritten to `<a href="http://notriddle.com/some-directory/test">` |
2492 | /// * `<a href="/test">` will be rewritten to `<a href="http://notriddle.com/test">` |
2493 | /// * `<a href="//example.com/test">` will be rewritten to `<a href="http://example.com/test">` |
2494 | /// * `<a href="http://example.com/test">` is an absolute URL, so it will be kept as-is |
2495 | /// |
2496 | /// ## `Custom` |
2497 | /// |
2498 | /// Pass the relative URL to a function. |
2499 | /// If it returns `Some(string)`, then that one gets used. |
2500 | /// Otherwise, it will remove the attribute (like `Deny` does). |
2501 | /// |
2502 | /// use std::borrow::Cow; |
2503 | /// fn is_absolute_path(url: &str) -> bool { |
2504 | /// let u = url.as_bytes(); |
2505 | /// // `//a/b/c` is "protocol-relative", meaning "a" is a hostname |
2506 | /// // `/a/b/c` is an absolute path, and what we want to do stuff to. |
2507 | /// u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/') |
2508 | /// } |
2509 | /// fn evaluate(url: &str) -> Option<Cow<str>> { |
2510 | /// if is_absolute_path(url) { |
2511 | /// Some(Cow::Owned(String::from("/root") + url)) |
2512 | /// } else { |
2513 | /// Some(Cow::Borrowed(url)) |
2514 | /// } |
2515 | /// } |
2516 | /// fn main() { |
2517 | /// let a = ammonia::Builder::new() |
2518 | /// .url_relative(ammonia::UrlRelative::Custom(Box::new(evaluate))) |
2519 | /// .clean("<a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>") |
2520 | /// .to_string(); |
2521 | /// assert_eq!(a, "<a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>"); |
2522 | /// } |
2523 | /// |
2524 | /// This function is only applied to relative URLs. |
2525 | /// To filter all of the URLs, |
2526 | /// use the not-yet-implemented Content Security Policy. |
2527 | #[non_exhaustive ] |
2528 | pub enum UrlRelative { |
2529 | /// Relative URLs will be completely stripped from the document. |
2530 | Deny, |
2531 | /// Relative URLs will be passed through unchanged. |
2532 | PassThrough, |
2533 | /// Relative URLs will be changed into absolute URLs, based on this base URL. |
2534 | RewriteWithBase(Url), |
2535 | /// Force absolute and relative paths into a particular directory. |
2536 | /// |
2537 | /// Since the resolver does not affect fully-qualified URLs, it doesn't |
2538 | /// prevent users from linking wherever they want. This feature only |
2539 | /// serves to make content more portable. |
2540 | /// |
2541 | /// # Examples |
2542 | /// |
2543 | /// <table> |
2544 | /// <thead> |
2545 | /// <tr> |
2546 | /// <th>root</th> |
2547 | /// <th>path</th> |
2548 | /// <th>url</th> |
2549 | /// <th>result</th> |
2550 | /// </tr> |
2551 | /// </thead> |
2552 | /// <tbody> |
2553 | /// <tr> |
2554 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2555 | /// <td>README.md</td> |
2556 | /// <td></td> |
2557 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td> |
2558 | /// </tr><tr> |
2559 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2560 | /// <td>README.md</td> |
2561 | /// <td>/</td> |
2562 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2563 | /// </tr><tr> |
2564 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2565 | /// <td>README.md</td> |
2566 | /// <td>/CONTRIBUTING.md</td> |
2567 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td> |
2568 | /// </tr><tr> |
2569 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td> |
2570 | /// <td>README.md</td> |
2571 | /// <td></td> |
2572 | /// <td>https://github.com/rust-ammonia/ammonia/blob/README.md</td> |
2573 | /// </tr><tr> |
2574 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td> |
2575 | /// <td>README.md</td> |
2576 | /// <td>/</td> |
2577 | /// <td>https://github.com/rust-ammonia/ammonia/blob/</td> |
2578 | /// </tr><tr> |
2579 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td> |
2580 | /// <td>README.md</td> |
2581 | /// <td>/CONTRIBUTING.md</td> |
2582 | /// <td>https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md</td> |
2583 | /// </tr><tr> |
2584 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2585 | /// <td></td> |
2586 | /// <td></td> |
2587 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2588 | /// </tr><tr> |
2589 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2590 | /// <td></td> |
2591 | /// <td>/</td> |
2592 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2593 | /// </tr><tr> |
2594 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2595 | /// <td></td> |
2596 | /// <td>/CONTRIBUTING.md</td> |
2597 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td> |
2598 | /// </tr><tr> |
2599 | /// <td>https://github.com/</td> |
2600 | /// <td>rust-ammonia/ammonia/blob/master/README.md</td> |
2601 | /// <td></td> |
2602 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td> |
2603 | /// </tr><tr> |
2604 | /// <td>https://github.com/</td> |
2605 | /// <td>rust-ammonia/ammonia/blob/master/README.md</td> |
2606 | /// <td>/</td> |
2607 | /// <td>https://github.com/</td> |
2608 | /// </tr><tr> |
2609 | /// <td>https://github.com/</td> |
2610 | /// <td>rust-ammonia/ammonia/blob/master/README.md</td> |
2611 | /// <td>CONTRIBUTING.md</td> |
2612 | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td> |
2613 | /// </tr><tr> |
2614 | /// <td>https://github.com/</td> |
2615 | /// <td>rust-ammonia/ammonia/blob/master/README.md</td> |
2616 | /// <td>/CONTRIBUTING.md</td> |
2617 | /// <td>https://github.com/CONTRIBUTING.md</td> |
2618 | /// </tr> |
2619 | /// </tbody> |
2620 | /// </table> |
2621 | RewriteWithRoot { |
2622 | /// The URL that is treated as the root by the resolver. |
2623 | root: Url, |
2624 | /// The "current path" used to resolve relative paths. |
2625 | path: String, |
2626 | }, |
2627 | /// Rewrite URLs with a custom function. |
2628 | Custom(Box<dyn UrlRelativeEvaluate>), |
2629 | } |
2630 | |
2631 | impl UrlRelative { |
2632 | fn evaluate(&self, url: &str) -> Option<tendril::StrTendril> { |
2633 | match self { |
2634 | UrlRelative::RewriteWithBase(ref url_base) => { |
2635 | url_base.join(url).ok().and_then(|x| StrTendril::from_str(x.as_str()).ok()) |
2636 | } |
2637 | UrlRelative::RewriteWithRoot { ref root, ref path } => { |
2638 | (match url.as_bytes() { |
2639 | // Scheme-relative URL |
2640 | [b'/' , b'/' , ..] => root.join(url), |
2641 | // Path-absolute URL |
2642 | b"/" => root.join("." ), |
2643 | [b'/' , ..] => root.join(&url[1..]), |
2644 | // Path-relative URL |
2645 | _ => root.join(path).and_then(|r| r.join(url)), |
2646 | }).ok().and_then(|x| StrTendril::from_str(x.as_str()).ok()) |
2647 | } |
2648 | UrlRelative::Custom(ref evaluate) => { |
2649 | evaluate |
2650 | .evaluate(&*url) |
2651 | .as_ref() |
2652 | .map(Cow::as_ref) |
2653 | .map(StrTendril::from_str) |
2654 | .and_then(Result::ok) |
2655 | } |
2656 | UrlRelative::PassThrough => StrTendril::from_str(url).ok(), |
2657 | UrlRelative::Deny => None, |
2658 | } |
2659 | } |
2660 | } |
2661 | |
2662 | impl fmt::Debug for UrlRelative { |
2663 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
2664 | match *self { |
2665 | UrlRelative::Deny => write!(f, "UrlRelative::Deny" ), |
2666 | UrlRelative::PassThrough => write!(f, "UrlRelative::PassThrough" ), |
2667 | UrlRelative::RewriteWithBase(ref base: &Url) => { |
2668 | write!(f, "UrlRelative::RewriteWithBase( {})" , base) |
2669 | } |
2670 | UrlRelative::RewriteWithRoot { ref root: &Url, ref path: &String } => { |
2671 | write!(f, "UrlRelative::RewriteWithRoot {{ root: {root}, path: {path} }}" ) |
2672 | } |
2673 | UrlRelative::Custom(_) => write!(f, "UrlRelative::Custom" ), |
2674 | } |
2675 | } |
2676 | } |
2677 | |
2678 | /// Types that implement this trait can be used to convert a relative URL into an absolute URL. |
2679 | /// |
2680 | /// This evaluator is only called when the URL is relative; absolute URLs are not evaluated. |
2681 | /// |
2682 | /// See [`url_relative`][url_relative] for more details. |
2683 | /// |
2684 | /// [url_relative]: struct.Builder.html#method.url_relative |
2685 | pub trait UrlRelativeEvaluate: Send + Sync { |
2686 | /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string. |
2687 | fn evaluate<'a>(&self, _: &'a str) -> Option<Cow<'a, str>>; |
2688 | } |
2689 | impl<T> UrlRelativeEvaluate for T |
2690 | where |
2691 | T: Fn(&str) -> Option<Cow<'_, str>> + Send + Sync, |
2692 | { |
2693 | fn evaluate<'a>(&self, url: &'a str) -> Option<Cow<'a, str>> { |
2694 | self(url) |
2695 | } |
2696 | } |
2697 | |
2698 | impl fmt::Debug for dyn AttributeFilter { |
2699 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
2700 | f.write_str(data:"AttributeFilter" ) |
2701 | } |
2702 | } |
2703 | |
2704 | /// Types that implement this trait can be used to remove or rewrite arbitrary attributes. |
2705 | /// |
2706 | /// See [`attribute_filter`][attribute_filter] for more details. |
2707 | /// |
2708 | /// [attribute_filter]: struct.Builder.html#method.attribute_filter |
2709 | pub trait AttributeFilter: Send + Sync { |
2710 | /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string. |
2711 | fn filter<'a>(&self, _: &str, _: &str, _: &'a str) -> Option<Cow<'a, str>>; |
2712 | } |
2713 | |
2714 | impl<T> AttributeFilter for T |
2715 | where |
2716 | T: for<'a> Fn(&str, &str, &'a str) -> Option<Cow<'a, str>> + Send + Sync + 'static, |
2717 | { |
2718 | fn filter<'a>(&self, element: &str, attribute: &str, value: &'a str) -> Option<Cow<'a, str>> { |
2719 | self(element, attribute, value) |
2720 | } |
2721 | } |
2722 | |
2723 | /// A sanitized HTML document. |
2724 | /// |
2725 | /// The `Document` type is an opaque struct representing an HTML fragment that was sanitized by |
2726 | /// `ammonia`. It can be converted to a [`String`] or written to a [`Write`] instance. This allows |
2727 | /// users to avoid buffering the serialized representation to a [`String`] when desired. |
2728 | /// |
2729 | /// This type is opaque to insulate the caller from breaking changes in the `html5ever` interface. |
2730 | /// |
2731 | /// Note that this type wraps an `html5ever` DOM tree. `ammonia` does not support streaming, so |
2732 | /// the complete fragment needs to be stored in memory during processing. |
2733 | /// |
2734 | /// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html |
2735 | /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html |
2736 | /// |
2737 | /// # Examples |
2738 | /// |
2739 | /// use ammonia::Builder; |
2740 | /// |
2741 | /// let input = "<!-- comments will be stripped -->This is an Ammonia example."; |
2742 | /// let output = "This is an Ammonia example."; |
2743 | /// |
2744 | /// let document = Builder::new() |
2745 | /// .clean(input); |
2746 | /// assert_eq!(document.to_string(), output); |
2747 | pub struct Document(RcDom); |
2748 | |
2749 | impl Document { |
2750 | /// Serializes a `Document` instance to a `String`. |
2751 | /// |
2752 | /// This method returns a [`String`] with the sanitized HTML. This is the simplest way to use |
2753 | /// `ammonia`. |
2754 | /// |
2755 | /// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html |
2756 | /// |
2757 | /// # Examples |
2758 | /// |
2759 | /// use ammonia::Builder; |
2760 | /// |
2761 | /// let input = "Some <style></style>HTML here"; |
2762 | /// let output = "Some HTML here"; |
2763 | /// |
2764 | /// let document = Builder::new() |
2765 | /// .clean(input); |
2766 | /// assert_eq!(document.to_string(), output); |
2767 | pub fn to_string(&self) -> String { |
2768 | let opts = Self::serialize_opts(); |
2769 | let mut ret_val = Vec::new(); |
2770 | let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into(); |
2771 | serialize(&mut ret_val, &inner, opts) |
2772 | .expect("Writing to a string shouldn't fail (expect on OOM)" ); |
2773 | String::from_utf8(ret_val).expect("html5ever only supports UTF8" ) |
2774 | } |
2775 | |
2776 | /// Serializes a `Document` instance to a writer. |
2777 | /// |
2778 | /// This method writes the sanitized HTML to a [`Write`] instance, avoiding a buffering step. |
2779 | /// |
2780 | /// To avoid consuming the writer, a mutable reference can be passed, like in the example below. |
2781 | /// |
2782 | /// Note that the in-memory representation of `Document` is larger than the serialized |
2783 | /// `String`. |
2784 | /// |
2785 | /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html |
2786 | /// |
2787 | /// # Examples |
2788 | /// |
2789 | /// use ammonia::Builder; |
2790 | /// |
2791 | /// let input = "Some <style></style>HTML here"; |
2792 | /// let expected = b"Some HTML here"; |
2793 | /// |
2794 | /// let document = Builder::new() |
2795 | /// .clean(input); |
2796 | /// |
2797 | /// let mut sanitized = Vec::new(); |
2798 | /// document.write_to(&mut sanitized) |
2799 | /// .expect("Writing to a string should not fail (except on OOM)"); |
2800 | /// assert_eq!(sanitized, expected); |
2801 | pub fn write_to<W>(&self, writer: W) -> io::Result<()> |
2802 | where |
2803 | W: io::Write, |
2804 | { |
2805 | let opts = Self::serialize_opts(); |
2806 | let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into(); |
2807 | serialize(writer, &inner, opts) |
2808 | } |
2809 | |
2810 | /// Exposes the `Document` instance as an [`rcdom::Handle`]. |
2811 | /// |
2812 | /// This method returns the inner object backing the `Document` instance. This allows |
2813 | /// making further changes to the DOM without introducing redundant serialization and |
2814 | /// parsing. |
2815 | /// |
2816 | /// Note that this method should be considered unstable and sits outside of the semver |
2817 | /// stability guarantees. It may change, break, or go away at any time, either because |
2818 | /// of `html5ever` changes or `ammonia` implementation changes. |
2819 | /// |
2820 | /// For this method to be accessible, a `cfg` flag is required. The easiest way is to |
2821 | /// use the `RUSTFLAGS` environment variable: |
2822 | /// |
2823 | /// ```text |
2824 | /// RUSTFLAGS='--cfg ammonia_unstable' cargo build |
2825 | /// ``` |
2826 | /// |
2827 | /// on Unix-like platforms, or |
2828 | /// |
2829 | /// ```text |
2830 | /// set RUSTFLAGS=--cfg ammonia_unstable |
2831 | /// cargo build |
2832 | /// ``` |
2833 | /// |
2834 | /// on Windows. |
2835 | /// |
2836 | /// This requirement also applies to crates that transitively depend on crates that use |
2837 | /// this flag. |
2838 | /// |
2839 | /// # Examples |
2840 | /// |
2841 | /// use ammonia::Builder; |
2842 | /// use maplit::hashset; |
2843 | /// use html5ever::serialize::{serialize, SerializeOpts}; |
2844 | /// |
2845 | /// # use std::error::Error; |
2846 | /// # fn do_main() -> Result<(), Box<Error>> { |
2847 | /// let input = "<a>one link</a> and <a>one more</a>"; |
2848 | /// let expected = "<a>one more</a> and <a>one link</a>"; |
2849 | /// |
2850 | /// let document = Builder::new() |
2851 | /// .link_rel(None) |
2852 | /// .clean(input); |
2853 | /// |
2854 | /// let mut node = document.to_dom_node(); |
2855 | /// node.children.borrow_mut().reverse(); |
2856 | /// |
2857 | /// let mut buf = Vec::new(); |
2858 | /// serialize(&mut buf, &node, SerializeOpts::default())?; |
2859 | /// let output = String::from_utf8(buf)?; |
2860 | /// |
2861 | /// assert_eq!(output, expected); |
2862 | /// # Ok(()) |
2863 | /// # } |
2864 | /// # fn main() { do_main().unwrap() } |
2865 | #[cfg (ammonia_unstable)] |
2866 | pub fn to_dom_node(&self) -> Handle { |
2867 | self.0.document.children.borrow()[0].clone() |
2868 | } |
2869 | |
2870 | fn serialize_opts() -> SerializeOpts { |
2871 | SerializeOpts::default() |
2872 | } |
2873 | } |
2874 | |
2875 | impl Clone for Document { |
2876 | fn clone(&self) -> Self { |
2877 | let parser: Parser = Builder::make_parser(); |
2878 | let dom: RcDom = parser.one(&self.to_string()[..]); |
2879 | Document(dom) |
2880 | } |
2881 | } |
2882 | |
2883 | impl fmt::Display for Document { |
2884 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
2885 | write!(f, " {}" , self.to_string()) |
2886 | } |
2887 | } |
2888 | |
2889 | impl fmt::Debug for Document { |
2890 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
2891 | write!(f, "Document( {})" , self.to_string()) |
2892 | } |
2893 | } |
2894 | |
2895 | impl From<Document> for String { |
2896 | fn from(document: Document) -> Self { |
2897 | document.to_string() |
2898 | } |
2899 | } |
2900 | |
2901 | #[cfg (test)] |
2902 | mod test { |
2903 | use super::*; |
2904 | #[test ] |
2905 | fn deeply_nested_whitelisted() { |
2906 | clean(&"<b>" .repeat(60_000)); |
2907 | } |
2908 | #[test ] |
2909 | fn deeply_nested_blacklisted() { |
2910 | clean(&"<b-b>" .repeat(60_000)); |
2911 | } |
2912 | #[test ] |
2913 | fn deeply_nested_alternating() { |
2914 | clean(&"<b-b>" .repeat(35_000)); |
2915 | } |
2916 | #[test ] |
2917 | fn included_angles() { |
2918 | let fragment = "1 < 2" ; |
2919 | let result = clean(fragment); |
2920 | assert_eq!(result, "1 < 2" ); |
2921 | } |
2922 | #[test ] |
2923 | fn remove_script() { |
2924 | let fragment = "an <script>evil()</script> example" ; |
2925 | let result = clean(fragment); |
2926 | assert_eq!(result, "an example" ); |
2927 | } |
2928 | #[test ] |
2929 | fn ignore_link() { |
2930 | let fragment = "a <a href= \"http://www.google.com \">good</a> example" ; |
2931 | let expected = "a <a href= \"http://www.google.com \" rel= \"noopener noreferrer \">\ |
2932 | good</a> example" ; |
2933 | let result = clean(fragment); |
2934 | assert_eq!(result, expected); |
2935 | } |
2936 | #[test ] |
2937 | fn remove_unsafe_link() { |
2938 | let fragment = "an <a onclick= \"evil() \" href= \"http://www.google.com \">evil</a> example" ; |
2939 | let result = clean(fragment); |
2940 | assert_eq!( |
2941 | result, |
2942 | "an <a href= \"http://www.google.com \" rel= \"noopener noreferrer \">evil</a> example" |
2943 | ); |
2944 | } |
2945 | #[test ] |
2946 | fn remove_js_link() { |
2947 | let fragment = "an <a href= \"javascript:evil() \">evil</a> example" ; |
2948 | let result = clean(fragment); |
2949 | assert_eq!(result, "an <a rel= \"noopener noreferrer \">evil</a> example" ); |
2950 | } |
2951 | #[test ] |
2952 | fn tag_rebalance() { |
2953 | let fragment = "<b>AWESOME!" ; |
2954 | let result = clean(fragment); |
2955 | assert_eq!(result, "<b>AWESOME!</b>" ); |
2956 | } |
2957 | #[test ] |
2958 | fn allow_url_relative() { |
2959 | let fragment = "<a href=test>Test</a>" ; |
2960 | let result = Builder::new() |
2961 | .url_relative(UrlRelative::PassThrough) |
2962 | .clean(fragment) |
2963 | .to_string(); |
2964 | assert_eq!( |
2965 | result, |
2966 | "<a href= \"test \" rel= \"noopener noreferrer \">Test</a>" |
2967 | ); |
2968 | } |
2969 | #[test ] |
2970 | fn rewrite_url_relative() { |
2971 | let fragment = "<a href=test>Test</a>" ; |
2972 | let result = Builder::new() |
2973 | .url_relative(UrlRelative::RewriteWithBase( |
2974 | Url::parse("http://example.com/" ).unwrap(), |
2975 | )) |
2976 | .clean(fragment) |
2977 | .to_string(); |
2978 | assert_eq!( |
2979 | result, |
2980 | "<a href= \"http://example.com/test \" rel= \"noopener noreferrer \">Test</a>" |
2981 | ); |
2982 | } |
2983 | #[test ] |
2984 | fn rewrite_url_relative_with_invalid_url() { |
2985 | // Reduced from https://github.com/Bauke/ammonia-crash-test |
2986 | let fragment = r##"<a href="\\"https://example.com\\"">test</a>"## ; |
2987 | let result = Builder::new() |
2988 | .url_relative(UrlRelative::RewriteWithBase( |
2989 | Url::parse("http://example.com/" ).unwrap(), |
2990 | )) |
2991 | .clean(fragment) |
2992 | .to_string(); |
2993 | assert_eq!(result, r##"<a rel="noopener noreferrer">test</a>"## ); |
2994 | } |
2995 | #[test ] |
2996 | fn attribute_filter_nop() { |
2997 | let fragment = "<a href=test>Test</a>" ; |
2998 | let result = Builder::new() |
2999 | .attribute_filter(|elem, attr, value| { |
3000 | assert_eq!("a" , elem); |
3001 | assert!( |
3002 | match (attr, value) { |
3003 | ("href" , "test" ) => true, |
3004 | ("rel" , "noopener noreferrer" ) => true, |
3005 | _ => false, |
3006 | }, |
3007 | "{}" , |
3008 | value.to_string() |
3009 | ); |
3010 | Some(value.into()) |
3011 | }) |
3012 | .clean(fragment) |
3013 | .to_string(); |
3014 | assert_eq!( |
3015 | result, |
3016 | "<a href= \"test \" rel= \"noopener noreferrer \">Test</a>" |
3017 | ); |
3018 | } |
3019 | |
3020 | #[test ] |
3021 | fn attribute_filter_drop() { |
3022 | let fragment = "Test<img alt=test src=imgtest>" ; |
3023 | let result = Builder::new() |
3024 | .attribute_filter(|elem, attr, value| { |
3025 | assert_eq!("img" , elem); |
3026 | match (attr, value) { |
3027 | ("src" , "imgtest" ) => None, |
3028 | ("alt" , "test" ) => Some(value.into()), |
3029 | _ => panic!("unexpected" ), |
3030 | } |
3031 | }) |
3032 | .clean(fragment) |
3033 | .to_string(); |
3034 | assert_eq!(result, r#"Test<img alt="test">"# ); |
3035 | } |
3036 | |
3037 | #[test ] |
3038 | fn url_filter_absolute() { |
3039 | let fragment = "Test<img alt=test src=imgtest>" ; |
3040 | let result = Builder::new() |
3041 | .attribute_filter(|elem, attr, value| { |
3042 | assert_eq!("img" , elem); |
3043 | match (attr, value) { |
3044 | ("src" , "imgtest" ) => { |
3045 | Some(format!("https://example.com/images/ {}" , value).into()) |
3046 | } |
3047 | ("alt" , "test" ) => None, |
3048 | _ => panic!("unexpected" ), |
3049 | } |
3050 | }) |
3051 | .url_relative(UrlRelative::RewriteWithBase( |
3052 | Url::parse("http://wrong.invalid/" ).unwrap(), |
3053 | )) |
3054 | .clean(fragment) |
3055 | .to_string(); |
3056 | assert_eq!( |
3057 | result, |
3058 | r#"Test<img src="https://example.com/images/imgtest">"# |
3059 | ); |
3060 | } |
3061 | |
3062 | #[test ] |
3063 | fn url_filter_relative() { |
3064 | let fragment = "Test<img alt=test src=imgtest>" ; |
3065 | let result = Builder::new() |
3066 | .attribute_filter(|elem, attr, value| { |
3067 | assert_eq!("img" , elem); |
3068 | match (attr, value) { |
3069 | ("src" , "imgtest" ) => Some("rewrite" .into()), |
3070 | ("alt" , "test" ) => Some("altalt" .into()), |
3071 | _ => panic!("unexpected" ), |
3072 | } |
3073 | }) |
3074 | .url_relative(UrlRelative::RewriteWithBase( |
3075 | Url::parse("https://example.com/base/#" ).unwrap(), |
3076 | )) |
3077 | .clean(fragment) |
3078 | .to_string(); |
3079 | assert_eq!( |
3080 | result, |
3081 | r#"Test<img alt="altalt" src="https://example.com/base/rewrite">"# |
3082 | ); |
3083 | } |
3084 | |
3085 | #[test ] |
3086 | fn rewrite_url_relative_no_rel() { |
3087 | let fragment = "<a href=test>Test</a>" ; |
3088 | let result = Builder::new() |
3089 | .url_relative(UrlRelative::RewriteWithBase( |
3090 | Url::parse("http://example.com/" ).unwrap(), |
3091 | )) |
3092 | .link_rel(None) |
3093 | .clean(fragment) |
3094 | .to_string(); |
3095 | assert_eq!(result, "<a href= \"http://example.com/test \">Test</a>" ); |
3096 | } |
3097 | #[test ] |
3098 | fn deny_url_relative() { |
3099 | let fragment = "<a href=test>Test</a>" ; |
3100 | let result = Builder::new() |
3101 | .url_relative(UrlRelative::Deny) |
3102 | .clean(fragment) |
3103 | .to_string(); |
3104 | assert_eq!(result, "<a rel= \"noopener noreferrer \">Test</a>" ); |
3105 | } |
3106 | #[test ] |
3107 | fn replace_rel() { |
3108 | let fragment = "<a href=test rel= \"garbage \">Test</a>" ; |
3109 | let result = Builder::new() |
3110 | .url_relative(UrlRelative::PassThrough) |
3111 | .clean(fragment) |
3112 | .to_string(); |
3113 | assert_eq!( |
3114 | result, |
3115 | "<a href= \"test \" rel= \"noopener noreferrer \">Test</a>" |
3116 | ); |
3117 | } |
3118 | #[test ] |
3119 | fn consider_rel_still_banned() { |
3120 | let fragment = "<a href=test rel= \"garbage \">Test</a>" ; |
3121 | let result = Builder::new() |
3122 | .url_relative(UrlRelative::PassThrough) |
3123 | .link_rel(None) |
3124 | .clean(fragment) |
3125 | .to_string(); |
3126 | assert_eq!(result, "<a href= \"test \">Test</a>" ); |
3127 | } |
3128 | #[test ] |
3129 | fn object_data() { |
3130 | let fragment = "<span data= \"javascript:evil() \">Test</span>\ |
3131 | <object data= \"javascript:evil() \"></object>M" ; |
3132 | let expected = r#"<span data="javascript:evil()">Test</span><object></object>M"# ; |
3133 | let result = Builder::new() |
3134 | .tags(hashset!["span" , "object" ]) |
3135 | .generic_attributes(hashset!["data" ]) |
3136 | .clean(fragment) |
3137 | .to_string(); |
3138 | assert_eq!(result, expected); |
3139 | } |
3140 | #[test ] |
3141 | fn remove_attributes() { |
3142 | let fragment = "<table border= \"1 \"><tr></tr></table>" ; |
3143 | let result = Builder::new().clean(fragment); |
3144 | assert_eq!( |
3145 | result.to_string(), |
3146 | "<table><tbody><tr></tr></tbody></table>" |
3147 | ); |
3148 | } |
3149 | #[test ] |
3150 | fn quotes_in_attrs() { |
3151 | let fragment = "<b title=' \"'>contents</b>" ; |
3152 | let result = clean(fragment); |
3153 | assert_eq!(result, "<b title= \"" \">contents</b>" ); |
3154 | } |
3155 | #[test ] |
3156 | #[should_panic ] |
3157 | fn panic_if_rel_is_allowed_and_replaced_generic() { |
3158 | Builder::new() |
3159 | .link_rel(Some("noopener noreferrer" )) |
3160 | .generic_attributes(hashset!["rel" ]) |
3161 | .clean("something" ); |
3162 | } |
3163 | #[test ] |
3164 | #[should_panic ] |
3165 | fn panic_if_rel_is_allowed_and_replaced_a() { |
3166 | Builder::new() |
3167 | .link_rel(Some("noopener noreferrer" )) |
3168 | .tag_attributes(hashmap![ |
3169 | "a" => hashset!["rel" ], |
3170 | ]) |
3171 | .clean("something" ); |
3172 | } |
3173 | #[test ] |
3174 | fn no_panic_if_rel_is_allowed_and_replaced_span() { |
3175 | Builder::new() |
3176 | .link_rel(Some("noopener noreferrer" )) |
3177 | .tag_attributes(hashmap![ |
3178 | "span" => hashset!["rel" ], |
3179 | ]) |
3180 | .clean("<span rel= \"what \">s</span>" ); |
3181 | } |
3182 | #[test ] |
3183 | fn no_panic_if_rel_is_allowed_and_not_replaced_generic() { |
3184 | Builder::new() |
3185 | .link_rel(None) |
3186 | .generic_attributes(hashset!["rel" ]) |
3187 | .clean("<a rel= \"what \">s</a>" ); |
3188 | } |
3189 | #[test ] |
3190 | fn no_panic_if_rel_is_allowed_and_not_replaced_a() { |
3191 | Builder::new() |
3192 | .link_rel(None) |
3193 | .tag_attributes(hashmap![ |
3194 | "a" => hashset!["rel" ], |
3195 | ]) |
3196 | .clean("<a rel= \"what \">s</a>" ); |
3197 | } |
3198 | #[test ] |
3199 | fn dont_close_void_elements() { |
3200 | let fragment = "<br>" ; |
3201 | let result = clean(fragment); |
3202 | assert_eq!(result.to_string(), "<br>" ); |
3203 | } |
3204 | #[should_panic ] |
3205 | #[test ] |
3206 | fn panic_on_allowed_classes_tag_attributes() { |
3207 | let fragment = "<p class= \"foo bar \"><a class= \"baz bleh \">Hey</a></p>" ; |
3208 | Builder::new() |
3209 | .link_rel(None) |
3210 | .tag_attributes(hashmap![ |
3211 | "p" => hashset!["class" ], |
3212 | "a" => hashset!["class" ], |
3213 | ]) |
3214 | .allowed_classes(hashmap![ |
3215 | "p" => hashset!["foo" , "bar" ], |
3216 | "a" => hashset!["baz" ], |
3217 | ]) |
3218 | .clean(fragment); |
3219 | } |
3220 | #[should_panic ] |
3221 | #[test ] |
3222 | fn panic_on_allowed_classes_generic_attributes() { |
3223 | let fragment = "<p class= \"foo bar \"><a class= \"baz bleh \">Hey</a></p>" ; |
3224 | Builder::new() |
3225 | .link_rel(None) |
3226 | .generic_attributes(hashset!["class" , "href" , "some-foo" ]) |
3227 | .allowed_classes(hashmap![ |
3228 | "p" => hashset!["foo" , "bar" ], |
3229 | "a" => hashset!["baz" ], |
3230 | ]) |
3231 | .clean(fragment); |
3232 | } |
3233 | #[test ] |
3234 | fn remove_non_allowed_classes() { |
3235 | let fragment = "<p class= \"foo bar \"><a class= \"baz bleh \">Hey</a></p>" ; |
3236 | let result = Builder::new() |
3237 | .link_rel(None) |
3238 | .allowed_classes(hashmap![ |
3239 | "p" => hashset!["foo" , "bar" ], |
3240 | "a" => hashset!["baz" ], |
3241 | ]) |
3242 | .clean(fragment); |
3243 | assert_eq!( |
3244 | result.to_string(), |
3245 | "<p class= \"foo bar \"><a class= \"baz \">Hey</a></p>" |
3246 | ); |
3247 | } |
3248 | #[test ] |
3249 | fn remove_non_allowed_classes_with_tag_class() { |
3250 | let fragment = "<p class= \"foo bar \"><a class= \"baz bleh \">Hey</a></p>" ; |
3251 | let result = Builder::new() |
3252 | .link_rel(None) |
3253 | .tag_attributes(hashmap![ |
3254 | "div" => hashset!["class" ], |
3255 | ]) |
3256 | .allowed_classes(hashmap![ |
3257 | "p" => hashset!["foo" , "bar" ], |
3258 | "a" => hashset!["baz" ], |
3259 | ]) |
3260 | .clean(fragment); |
3261 | assert_eq!( |
3262 | result.to_string(), |
3263 | "<p class= \"foo bar \"><a class= \"baz \">Hey</a></p>" |
3264 | ); |
3265 | } |
3266 | #[test ] |
3267 | fn allowed_classes_ascii_whitespace() { |
3268 | // According to https://infra.spec.whatwg.org/#ascii-whitespace, |
3269 | // TAB (\t), LF (\n), FF (\x0C), CR (\x0D) and SPACE (\x20) are |
3270 | // considered to be ASCII whitespace. Unicode whitespace characters |
3271 | // and VT (\x0B) aren't ASCII whitespace. |
3272 | let fragment = "<p class= \"a \tb \nc \x0Cd \re f \x0B g \u{2000}\">" ; |
3273 | let result = Builder::new() |
3274 | .allowed_classes(hashmap![ |
3275 | "p" => hashset!["a" , "b" , "c" , "d" , "e" , "f" , "g" ], |
3276 | ]) |
3277 | .clean(fragment); |
3278 | assert_eq!(result.to_string(), r#"<p class="a b c d e"></p>"# ); |
3279 | } |
3280 | #[test ] |
3281 | fn remove_non_allowed_attributes_with_tag_attribute_values() { |
3282 | let fragment = "<p data-label= \"baz \" name= \"foo \"></p>" ; |
3283 | let result = Builder::new() |
3284 | .tag_attribute_values(hashmap![ |
3285 | "p" => hashmap![ |
3286 | "data-label" => hashset!["bar" ], |
3287 | ], |
3288 | ]) |
3289 | .tag_attributes(hashmap![ |
3290 | "p" => hashset!["name" ], |
3291 | ]) |
3292 | .clean(fragment); |
3293 | assert_eq!(result.to_string(), "<p name= \"foo \"></p>" ,); |
3294 | } |
3295 | #[test ] |
3296 | fn keep_allowed_attributes_with_tag_attribute_values() { |
3297 | let fragment = "<p data-label= \"bar \" name= \"foo \"></p>" ; |
3298 | let result = Builder::new() |
3299 | .tag_attribute_values(hashmap![ |
3300 | "p" => hashmap![ |
3301 | "data-label" => hashset!["bar" ], |
3302 | ], |
3303 | ]) |
3304 | .tag_attributes(hashmap![ |
3305 | "p" => hashset!["name" ], |
3306 | ]) |
3307 | .clean(fragment); |
3308 | assert_eq!( |
3309 | result.to_string(), |
3310 | "<p data-label= \"bar \" name= \"foo \"></p>" , |
3311 | ); |
3312 | } |
3313 | #[test ] |
3314 | fn tag_attribute_values_case_insensitive() { |
3315 | let fragment = "<input type= \"CHECKBOX \" name= \"foo \">" ; |
3316 | let result = Builder::new() |
3317 | .tags(hashset!["input" ]) |
3318 | .tag_attribute_values(hashmap![ |
3319 | "input" => hashmap![ |
3320 | "type" => hashset!["checkbox" ], |
3321 | ], |
3322 | ]) |
3323 | .tag_attributes(hashmap![ |
3324 | "input" => hashset!["name" ], |
3325 | ]) |
3326 | .clean(fragment); |
3327 | assert_eq!(result.to_string(), "<input type= \"CHECKBOX \" name= \"foo \">" ,); |
3328 | } |
3329 | #[test ] |
3330 | fn set_tag_attribute_values() { |
3331 | let fragment = "<a href= \"https://example.com/ \">Link</a>" ; |
3332 | let result = Builder::new() |
3333 | .link_rel(None) |
3334 | .add_tag_attributes("a" , &["target" ]) |
3335 | .set_tag_attribute_value("a" , "target" , "_blank" ) |
3336 | .clean(fragment); |
3337 | assert_eq!( |
3338 | result.to_string(), |
3339 | "<a href= \"https://example.com/ \" target= \"_blank \">Link</a>" , |
3340 | ); |
3341 | } |
3342 | #[test ] |
3343 | fn update_existing_set_tag_attribute_values() { |
3344 | let fragment = "<a target= \"bad \" href= \"https://example.com/ \">Link</a>" ; |
3345 | let result = Builder::new() |
3346 | .link_rel(None) |
3347 | .add_tag_attributes("a" , &["target" ]) |
3348 | .set_tag_attribute_value("a" , "target" , "_blank" ) |
3349 | .clean(fragment); |
3350 | assert_eq!( |
3351 | result.to_string(), |
3352 | "<a target= \"_blank \" href= \"https://example.com/ \">Link</a>" , |
3353 | ); |
3354 | } |
3355 | #[test ] |
3356 | fn unwhitelisted_set_tag_attribute_values() { |
3357 | let fragment = "<span>hi</span><my-elem>" ; |
3358 | let result = Builder::new() |
3359 | .set_tag_attribute_value("my-elem" , "my-attr" , "val" ) |
3360 | .clean(fragment); |
3361 | assert_eq!(result.to_string(), "<span>hi</span>" ,); |
3362 | } |
3363 | #[test ] |
3364 | fn remove_entity_link() { |
3365 | let fragment = "<a href= \"javascript:a\ |
3366 | lert('XSS') \">Click me!</a>" ; |
3367 | let result = clean(fragment); |
3368 | assert_eq!( |
3369 | result.to_string(), |
3370 | "<a rel= \"noopener noreferrer \">Click me!</a>" |
3371 | ); |
3372 | } |
3373 | #[test ] |
3374 | fn remove_relative_url_evaluate() { |
3375 | fn is_absolute_path(url: &str) -> bool { |
3376 | let u = url.as_bytes(); |
3377 | // `//a/b/c` is "protocol-relative", meaning "a" is a hostname |
3378 | // `/a/b/c` is an absolute path, and what we want to do stuff to. |
3379 | u.get(0) == Some(&b'/' ) && u.get(1) != Some(&b'/' ) |
3380 | } |
3381 | fn is_banned(url: &str) -> bool { |
3382 | let u = url.as_bytes(); |
3383 | u.get(0) == Some(&b'b' ) && u.get(1) == Some(&b'a' ) |
3384 | } |
3385 | fn evaluate(url: &str) -> Option<Cow<'_, str>> { |
3386 | if is_absolute_path(url) { |
3387 | Some(Cow::Owned(String::from("/root" ) + url)) |
3388 | } else if is_banned(url) { |
3389 | None |
3390 | } else { |
3391 | Some(Cow::Borrowed(url)) |
3392 | } |
3393 | } |
3394 | let a = Builder::new() |
3395 | .url_relative(UrlRelative::Custom(Box::new(evaluate))) |
3396 | .clean("<a href=banned>banned</a><a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>" ) |
3397 | .to_string(); |
3398 | assert_eq!(a, "<a rel= \"noopener noreferrer \">banned</a><a href= \"/root/test/path \" rel= \"noopener noreferrer \">fixed</a><a href= \"path \" rel= \"noopener noreferrer \">passed</a><a href= \"http://google.com/ \" rel= \"noopener noreferrer \">skipped</a>" ); |
3399 | } |
3400 | #[test ] |
3401 | fn remove_relative_url_evaluate_b() { |
3402 | fn is_absolute_path(url: &str) -> bool { |
3403 | let u = url.as_bytes(); |
3404 | // `//a/b/c` is "protocol-relative", meaning "a" is a hostname |
3405 | // `/a/b/c` is an absolute path, and what we want to do stuff to. |
3406 | u.get(0) == Some(&b'/' ) && u.get(1) != Some(&b'/' ) |
3407 | } |
3408 | fn is_banned(url: &str) -> bool { |
3409 | let u = url.as_bytes(); |
3410 | u.get(0) == Some(&b'b' ) && u.get(1) == Some(&b'a' ) |
3411 | } |
3412 | fn evaluate(url: &str) -> Option<Cow<'_, str>> { |
3413 | if is_absolute_path(url) { |
3414 | Some(Cow::Owned(String::from("/root" ) + url)) |
3415 | } else if is_banned(url) { |
3416 | None |
3417 | } else { |
3418 | Some(Cow::Borrowed(url)) |
3419 | } |
3420 | } |
3421 | let a = Builder::new() |
3422 | .url_relative(UrlRelative::Custom(Box::new(evaluate))) |
3423 | .clean("<a href=banned>banned</a><a href=banned title=test>banned</a><a title=test href=banned>banned</a>" ) |
3424 | .to_string(); |
3425 | assert_eq!(a, "<a rel= \"noopener noreferrer \">banned</a><a rel= \"noopener noreferrer \" title= \"test \">banned</a><a title= \"test \" rel= \"noopener noreferrer \">banned</a>" ); |
3426 | } |
3427 | #[test ] |
3428 | fn remove_relative_url_evaluate_c() { |
3429 | // Don't run on absolute URLs. |
3430 | fn evaluate(_: &str) -> Option<Cow<'_, str>> { |
3431 | return Some(Cow::Owned(String::from("invalid" ))); |
3432 | } |
3433 | let a = Builder::new() |
3434 | .url_relative(UrlRelative::Custom(Box::new(evaluate))) |
3435 | .clean("<a href= \"https://www.google.com/ \">google</a>" ) |
3436 | .to_string(); |
3437 | assert_eq!( |
3438 | a, |
3439 | "<a href= \"https://www.google.com/ \" rel= \"noopener noreferrer \">google</a>" |
3440 | ); |
3441 | } |
3442 | #[test ] |
3443 | fn clean_children_of_bad_element() { |
3444 | let fragment = "<bad><evil>a</evil>b</bad>" ; |
3445 | let result = Builder::new().clean(fragment); |
3446 | assert_eq!(result.to_string(), "ab" ); |
3447 | } |
3448 | #[test ] |
3449 | fn reader_input() { |
3450 | let fragment = b"an <script>evil()</script> example" ; |
3451 | let result = Builder::new().clean_from_reader(&fragment[..]); |
3452 | assert!(result.is_ok()); |
3453 | assert_eq!(result.unwrap().to_string(), "an example" ); |
3454 | } |
3455 | #[test ] |
3456 | fn reader_non_utf8() { |
3457 | let fragment = b"non-utf8 \xF0\x90\x80string" ; |
3458 | let result = Builder::new().clean_from_reader(&fragment[..]); |
3459 | assert!(result.is_ok()); |
3460 | assert_eq!(result.unwrap().to_string(), "non-utf8 \u{fffd}string" ); |
3461 | } |
3462 | #[test ] |
3463 | fn display_impl() { |
3464 | let fragment = r#"a <a>link</a>"# ; |
3465 | let result = Builder::new().link_rel(None).clean(fragment); |
3466 | assert_eq!(format!(" {}" , result), "a <a>link</a>" ); |
3467 | } |
3468 | #[test ] |
3469 | fn debug_impl() { |
3470 | let fragment = r#"a <a>link</a>"# ; |
3471 | let result = Builder::new().link_rel(None).clean(fragment); |
3472 | assert_eq!(format!(" {:?}" , result), "Document(a <a>link</a>)" ); |
3473 | } |
3474 | #[cfg (ammonia_unstable)] |
3475 | #[test ] |
3476 | fn to_dom_node() { |
3477 | let fragment = r#"a <a>link</a>"# ; |
3478 | let result = Builder::new().link_rel(None).clean(fragment); |
3479 | let _node = result.to_dom_node(); |
3480 | } |
3481 | #[test ] |
3482 | fn string_from_document() { |
3483 | let fragment = r#"a <a>link"# ; |
3484 | let result = String::from(Builder::new().link_rel(None).clean(fragment)); |
3485 | assert_eq!(format!(" {}" , result), "a <a>link</a>" ); |
3486 | } |
3487 | fn require_sync<T: Sync>(_: T) {} |
3488 | fn require_send<T: Send>(_: T) {} |
3489 | #[test ] |
3490 | fn require_sync_and_send() { |
3491 | require_sync(Builder::new()); |
3492 | require_send(Builder::new()); |
3493 | } |
3494 | #[test ] |
3495 | fn id_prefixed() { |
3496 | let fragment = "<a id= \"hello \"></a><b id= \"hello \"></a>" ; |
3497 | let result = String::from( |
3498 | Builder::new() |
3499 | .tag_attributes(hashmap![ |
3500 | "a" => hashset!["id" ], |
3501 | ]) |
3502 | .id_prefix(Some("prefix-" )) |
3503 | .clean(fragment), |
3504 | ); |
3505 | assert_eq!( |
3506 | result.to_string(), |
3507 | "<a id= \"prefix-hello \" rel= \"noopener noreferrer \"></a><b></b>" |
3508 | ); |
3509 | } |
3510 | #[test ] |
3511 | fn id_already_prefixed() { |
3512 | let fragment = "<a id= \"prefix-hello \"></a>" ; |
3513 | let result = String::from( |
3514 | Builder::new() |
3515 | .tag_attributes(hashmap![ |
3516 | "a" => hashset!["id" ], |
3517 | ]) |
3518 | .id_prefix(Some("prefix-" )) |
3519 | .clean(fragment), |
3520 | ); |
3521 | assert_eq!( |
3522 | result.to_string(), |
3523 | "<a id= \"prefix-hello \" rel= \"noopener noreferrer \"></a>" |
3524 | ); |
3525 | } |
3526 | #[test ] |
3527 | fn clean_content_tags() { |
3528 | let fragment = "<script type= \"text/javascript \"><a>Hello!</a></script>" ; |
3529 | let result = String::from( |
3530 | Builder::new() |
3531 | .clean_content_tags(hashset!["script" ]) |
3532 | .clean(fragment), |
3533 | ); |
3534 | assert_eq!(result.to_string(), "" ); |
3535 | } |
3536 | #[test ] |
3537 | fn only_clean_content_tags() { |
3538 | let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>" ; |
3539 | let result = String::from( |
3540 | Builder::new() |
3541 | .clean_content_tags(hashset!["script" ]) |
3542 | .clean(fragment), |
3543 | ); |
3544 | assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>" ); |
3545 | } |
3546 | #[test ] |
3547 | fn clean_removed_default_tag() { |
3548 | let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>" ; |
3549 | let result = String::from( |
3550 | Builder::new() |
3551 | .rm_tags(hashset!["a" ]) |
3552 | .rm_tag_attributes("a" , hashset!["href" , "hreflang" ]) |
3553 | .clean_content_tags(hashset!["script" ]) |
3554 | .clean(fragment), |
3555 | ); |
3556 | assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>" ); |
3557 | } |
3558 | #[test ] |
3559 | #[should_panic ] |
3560 | fn panic_on_clean_content_tag_attribute() { |
3561 | Builder::new() |
3562 | .rm_tags(std::iter::once("a" )) |
3563 | .clean_content_tags(hashset!["a" ]) |
3564 | .clean("" ); |
3565 | } |
3566 | #[test ] |
3567 | #[should_panic ] |
3568 | fn panic_on_clean_content_tag() { |
3569 | Builder::new().clean_content_tags(hashset!["a" ]).clean("" ); |
3570 | } |
3571 | |
3572 | #[test ] |
3573 | fn clean_text_test() { |
3574 | assert_eq!( |
3575 | clean_text("<this> is <a test function" ), |
3576 | "<this> is <a test function" |
3577 | ); |
3578 | } |
3579 | |
3580 | #[test ] |
3581 | fn clean_text_spaces_test() { |
3582 | assert_eq!(clean_text(" \x09\x0a\x0c\x20" ), "	  " ); |
3583 | } |
3584 | |
3585 | #[test ] |
3586 | fn ns_svg() { |
3587 | // https://github.com/cure53/DOMPurify/pull/495 |
3588 | let fragment = r##"<svg><iframe><a title="</iframe><img src onerror=alert(1)>">test"## ; |
3589 | let result = String::from(Builder::new().add_tags(&["iframe" ]).clean(fragment)); |
3590 | assert_eq!(result.to_string(), "test" ); |
3591 | |
3592 | let fragment = "<svg><iframe>remove me</iframe></svg><iframe>keep me</iframe>" ; |
3593 | let result = String::from(Builder::new().add_tags(&["iframe" ]).clean(fragment)); |
3594 | assert_eq!(result.to_string(), "remove me<iframe>keep me</iframe>" ); |
3595 | |
3596 | let fragment = "<svg><a>remove me</a></svg><iframe>keep me</iframe>" ; |
3597 | let result = String::from(Builder::new().add_tags(&["iframe" ]).clean(fragment)); |
3598 | assert_eq!(result.to_string(), "remove me<iframe>keep me</iframe>" ); |
3599 | |
3600 | let fragment = "<svg><a>keep me</a></svg><iframe>keep me</iframe>" ; |
3601 | let result = String::from(Builder::new().add_tags(&["iframe" , "svg" ]).clean(fragment)); |
3602 | assert_eq!( |
3603 | result.to_string(), |
3604 | "<svg><a rel= \"noopener noreferrer \">keep me</a></svg><iframe>keep me</iframe>" |
3605 | ); |
3606 | } |
3607 | |
3608 | #[test ] |
3609 | fn ns_mathml() { |
3610 | // https://github.com/cure53/DOMPurify/pull/495 |
3611 | let fragment = "<mglyph></mglyph>" ; |
3612 | let result = String::from( |
3613 | Builder::new() |
3614 | .add_tags(&["math" , "mtext" , "mglyph" ]) |
3615 | .clean(fragment), |
3616 | ); |
3617 | assert_eq!(result.to_string(), "" ); |
3618 | let fragment = "<math><mtext><div><mglyph>" ; |
3619 | let result = String::from( |
3620 | Builder::new() |
3621 | .add_tags(&["math" , "mtext" , "mglyph" ]) |
3622 | .clean(fragment), |
3623 | ); |
3624 | assert_eq!( |
3625 | result.to_string(), |
3626 | "<math><mtext><div></div></mtext></math>" |
3627 | ); |
3628 | let fragment = "<math><mtext><mglyph>" ; |
3629 | let result = String::from( |
3630 | Builder::new() |
3631 | .add_tags(&["math" , "mtext" , "mglyph" ]) |
3632 | .clean(fragment), |
3633 | ); |
3634 | assert_eq!( |
3635 | result.to_string(), |
3636 | "<math><mtext><mglyph></mglyph></mtext></math>" |
3637 | ); |
3638 | } |
3639 | |
3640 | #[test ] |
3641 | fn generic_attribute_prefixes() { |
3642 | let prefix_data = ["data-" ]; |
3643 | let prefix_code = ["code-" ]; |
3644 | let mut b = Builder::new(); |
3645 | let mut hs: HashSet<&'_ str> = HashSet::new(); |
3646 | hs.insert("data-" ); |
3647 | assert_eq!(b.generic_attribute_prefixes.is_none(), true); |
3648 | b.generic_attribute_prefixes(hs); |
3649 | assert_eq!(b.generic_attribute_prefixes.is_some(), true); |
3650 | assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); |
3651 | b.add_generic_attribute_prefixes(&prefix_data); |
3652 | assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); |
3653 | b.add_generic_attribute_prefixes(&prefix_code); |
3654 | assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 2); |
3655 | b.rm_generic_attribute_prefixes(&prefix_code); |
3656 | assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); |
3657 | b.rm_generic_attribute_prefixes(&prefix_code); |
3658 | assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); |
3659 | b.rm_generic_attribute_prefixes(&prefix_data); |
3660 | assert_eq!(b.generic_attribute_prefixes.is_none(), true); |
3661 | } |
3662 | |
3663 | #[test ] |
3664 | fn generic_attribute_prefixes_clean() { |
3665 | let fragment = r#"<a data-1 data-2 code-1 code-2><a>Hello!</a></a>"# ; |
3666 | let result_cleaned = String::from( |
3667 | Builder::new() |
3668 | .add_tag_attributes("a" , &["data-1" ]) |
3669 | .clean(fragment), |
3670 | ); |
3671 | assert_eq!( |
3672 | result_cleaned, |
3673 | r#"<a data-1="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"# |
3674 | ); |
3675 | let result_allowed = String::from( |
3676 | Builder::new() |
3677 | .add_tag_attributes("a" , &["data-1" ]) |
3678 | .add_generic_attribute_prefixes(&["data-" ]) |
3679 | .clean(fragment), |
3680 | ); |
3681 | assert_eq!( |
3682 | result_allowed, |
3683 | r#"<a data-1="" data-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"# |
3684 | ); |
3685 | let result_allowed = String::from( |
3686 | Builder::new() |
3687 | .add_tag_attributes("a" , &["data-1" , "code-1" ]) |
3688 | .add_generic_attribute_prefixes(&["data-" , "code-" ]) |
3689 | .clean(fragment), |
3690 | ); |
3691 | assert_eq!( |
3692 | result_allowed, |
3693 | r#"<a data-1="" data-2="" code-1="" code-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"# |
3694 | ); |
3695 | } |
3696 | #[test ] |
3697 | fn lesser_than_isnt_html() { |
3698 | let fragment = "1 < 2" ; |
3699 | assert!(!is_html(fragment)); |
3700 | } |
3701 | #[test ] |
3702 | fn dense_lesser_than_isnt_html() { |
3703 | let fragment = "1<2" ; |
3704 | assert!(!is_html(fragment)); |
3705 | } |
3706 | #[test ] |
3707 | fn what_about_number_elements() { |
3708 | let fragment = "foo<2>bar" ; |
3709 | assert!(!is_html(fragment)); |
3710 | } |
3711 | #[test ] |
3712 | fn turbofish_is_html_sadly() { |
3713 | let fragment = "Vec::<u8>::new()" ; |
3714 | assert!(is_html(fragment)); |
3715 | } |
3716 | #[test ] |
3717 | fn stop_grinning() { |
3718 | let fragment = "did you really believe me? <g>" ; |
3719 | assert!(is_html(fragment)); |
3720 | } |
3721 | #[test ] |
3722 | fn dont_be_bold() { |
3723 | let fragment = "<b>" ; |
3724 | assert!(is_html(fragment)); |
3725 | } |
3726 | |
3727 | #[test ] |
3728 | fn rewrite_with_root() { |
3729 | let tests = [ |
3730 | ( |
3731 | "https://github.com/rust-ammonia/ammonia/blob/master/" , |
3732 | "README.md" , |
3733 | "" , |
3734 | "https://github.com/rust-ammonia/ammonia/blob/master/README.md" , |
3735 | ), |
3736 | ( |
3737 | "https://github.com/rust-ammonia/ammonia/blob/master/" , |
3738 | "README.md" , |
3739 | "/" , |
3740 | "https://github.com/rust-ammonia/ammonia/blob/master/" , |
3741 | ), |
3742 | ( |
3743 | "https://github.com/rust-ammonia/ammonia/blob/master/" , |
3744 | "README.md" , |
3745 | "/CONTRIBUTING.md" , |
3746 | "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md" , |
3747 | ), |
3748 | ( |
3749 | "https://github.com/rust-ammonia/ammonia/blob/master" , |
3750 | "README.md" , |
3751 | "" , |
3752 | "https://github.com/rust-ammonia/ammonia/blob/README.md" , |
3753 | ), |
3754 | ( |
3755 | "https://github.com/rust-ammonia/ammonia/blob/master" , |
3756 | "README.md" , |
3757 | "/" , |
3758 | "https://github.com/rust-ammonia/ammonia/blob/" , |
3759 | ), |
3760 | ( |
3761 | "https://github.com/rust-ammonia/ammonia/blob/master" , |
3762 | "README.md" , |
3763 | "/CONTRIBUTING.md" , |
3764 | "https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md" , |
3765 | ), |
3766 | ( |
3767 | "https://github.com/rust-ammonia/ammonia/blob/master/" , |
3768 | "" , |
3769 | "" , |
3770 | "https://github.com/rust-ammonia/ammonia/blob/master/" , |
3771 | ), |
3772 | ( |
3773 | "https://github.com/rust-ammonia/ammonia/blob/master/" , |
3774 | "" , |
3775 | "/" , |
3776 | "https://github.com/rust-ammonia/ammonia/blob/master/" , |
3777 | ), |
3778 | ( |
3779 | "https://github.com/rust-ammonia/ammonia/blob/master/" , |
3780 | "" , |
3781 | "/CONTRIBUTING.md" , |
3782 | "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md" , |
3783 | ), |
3784 | ( |
3785 | "https://github.com/" , |
3786 | "rust-ammonia/ammonia/blob/master/README.md" , |
3787 | "" , |
3788 | "https://github.com/rust-ammonia/ammonia/blob/master/README.md" , |
3789 | ), |
3790 | ( |
3791 | "https://github.com/" , |
3792 | "rust-ammonia/ammonia/blob/master/README.md" , |
3793 | "/" , |
3794 | "https://github.com/" , |
3795 | ), |
3796 | ( |
3797 | "https://github.com/" , |
3798 | "rust-ammonia/ammonia/blob/master/README.md" , |
3799 | "CONTRIBUTING.md" , |
3800 | "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md" , |
3801 | ), |
3802 | ( |
3803 | "https://github.com/" , |
3804 | "rust-ammonia/ammonia/blob/master/README.md" , |
3805 | "/CONTRIBUTING.md" , |
3806 | "https://github.com/CONTRIBUTING.md" , |
3807 | ), |
3808 | ]; |
3809 | for (root, path, url, result) in tests { |
3810 | let h = format!(r#"<a href=" {url}">test</a>"# ); |
3811 | let r = format!(r#"<a href=" {result}" rel="noopener noreferrer">test</a>"# ); |
3812 | let a = Builder::new() |
3813 | .url_relative(UrlRelative::RewriteWithRoot { root: Url::parse(root).unwrap(), path: path.to_string() }) |
3814 | .clean(&h) |
3815 | .to_string(); |
3816 | if r != a { |
3817 | println!("failed to check ( {root}, {path}, {url}, {result}) \n{r} != {a}" , r = r); |
3818 | assert_eq!(r, a); |
3819 | } |
3820 | } |
3821 | } |
3822 | } |
3823 | |