1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Scanners for fragments of CommonMark syntax
22
23use std::char;
24
25use crate::parse::HtmlScanGuard;
26pub(crate) use crate::puncttable::{is_ascii_punctuation, is_punctuation};
27use crate::strings::CowStr;
28use crate::{entities, BlockQuoteKind, HeadingLevel};
29use crate::{Alignment, LinkType};
30
31use memchr::memchr;
32
33// sorted for binary search
34const HTML_TAGS: [&str; 62] = [
35 "address",
36 "article",
37 "aside",
38 "base",
39 "basefont",
40 "blockquote",
41 "body",
42 "caption",
43 "center",
44 "col",
45 "colgroup",
46 "dd",
47 "details",
48 "dialog",
49 "dir",
50 "div",
51 "dl",
52 "dt",
53 "fieldset",
54 "figcaption",
55 "figure",
56 "footer",
57 "form",
58 "frame",
59 "frameset",
60 "h1",
61 "h2",
62 "h3",
63 "h4",
64 "h5",
65 "h6",
66 "head",
67 "header",
68 "hr",
69 "html",
70 "iframe",
71 "legend",
72 "li",
73 "link",
74 "main",
75 "menu",
76 "menuitem",
77 "nav",
78 "noframes",
79 "ol",
80 "optgroup",
81 "option",
82 "p",
83 "param",
84 "search",
85 "section",
86 "summary",
87 "table",
88 "tbody",
89 "td",
90 "tfoot",
91 "th",
92 "thead",
93 "title",
94 "tr",
95 "track",
96 "ul",
97];
98
99/// Analysis of the beginning of a line, including indentation and container
100/// markers.
101#[derive(Clone)]
102pub(crate) struct LineStart<'a> {
103 bytes: &'a [u8],
104 ix: usize,
105
106 // The index in `bytes` after the last tab we scanned; initially
107 // zero.
108 //
109 // Thus, there are no tab characters between `ix` and here, and for
110 // the purpose of defining block structure, this position can be
111 // considered to fall on a tab stop.
112 //
113 // This is only valid while scanning the initial portion of the
114 // line; methods that work with interior structure don't bother to
115 // update it.
116 tab_start: usize,
117
118 // In contexts where spaces help to define block structure, tabs
119 // behave as if they were replaced by spaces with a tab stop of 4
120 // characters.
121 //
122 // If we have scanned past a tab character but not consumed all
123 // the horizontal width it contributed, this is the number of
124 // spaces logically remaining, before the character at `ix`.
125 spaces_remaining: usize,
126
127 // no thematic breaks can occur before this offset.
128 // this prevents scanning over and over up to a certain point
129 min_hrule_offset: usize,
130}
131
132impl<'a> LineStart<'a> {
133 pub(crate) fn new(bytes: &[u8]) -> LineStart<'_> {
134 LineStart {
135 bytes,
136 tab_start: 0,
137 ix: 0,
138 spaces_remaining: 0,
139 min_hrule_offset: 0,
140 }
141 }
142
143 /// Try to scan a number of spaces.
144 ///
145 /// Returns true if all spaces were consumed.
146 ///
147 /// Note: consumes some spaces even if not successful.
148 pub(crate) fn scan_space(&mut self, n_space: usize) -> bool {
149 self.scan_space_inner(n_space) == 0
150 }
151
152 /// Scan a number of spaces up to a maximum.
153 ///
154 /// Returns number of spaces scanned.
155 pub(crate) fn scan_space_upto(&mut self, n_space: usize) -> usize {
156 n_space - self.scan_space_inner(n_space)
157 }
158
159 /// Returns unused remainder of spaces.
160 fn scan_space_inner(&mut self, mut n_space: usize) -> usize {
161 // Consume any common prefix between the number of spaces we
162 // want and the number of unscanned tab-introduced spaces.
163 let n_from_remaining = self.spaces_remaining.min(n_space);
164 self.spaces_remaining -= n_from_remaining;
165 n_space -= n_from_remaining;
166
167 while n_space > 0 && self.ix < self.bytes.len() {
168 match self.bytes[self.ix] {
169 b' ' => {
170 self.ix += 1;
171 n_space -= 1;
172 }
173 b'\t' => {
174 let spaces = 4 - (self.ix - self.tab_start) % 4;
175 self.ix += 1;
176 self.tab_start = self.ix;
177 let n = spaces.min(n_space);
178 n_space -= n;
179
180 // Record the unscanned portion of the tab.
181 self.spaces_remaining = spaces - n;
182 }
183 _ => break,
184 }
185 }
186 n_space
187 }
188
189 /// Scan all available ASCII whitespace (not including eol).
190 pub(crate) fn scan_all_space(&mut self) {
191 self.spaces_remaining = 0;
192 self.ix += self.bytes[self.ix..]
193 .iter()
194 .take_while(|&&b| b == b' ' || b == b'\t')
195 .count();
196 }
197
198 /// Determine whether we're at end of line (includes end of file).
199 pub(crate) fn is_at_eol(&self) -> bool {
200 self.bytes
201 .get(self.ix)
202 .map(|&c| c == b'\r' || c == b'\n')
203 .unwrap_or(true)
204 }
205
206 fn scan_ch(&mut self, c: u8) -> bool {
207 if self.ix < self.bytes.len() && self.bytes[self.ix] == c {
208 self.ix += 1;
209 true
210 } else {
211 false
212 }
213 }
214
215 fn scan_case_insensitive(&mut self, tag: &[u8]) -> bool {
216 if self.bytes.len() - self.ix < tag.len() {
217 return false;
218 }
219 let prefix = &self.bytes[self.ix..self.ix + tag.len()];
220 let ok = prefix.eq_ignore_ascii_case(tag);
221 if ok {
222 self.ix += tag.len();
223 }
224 ok
225 }
226
227 pub(crate) fn scan_blockquote_tag(&mut self) -> Option<BlockQuoteKind> {
228 let saved_ix = self.ix;
229 let tag = if self.scan_ch(b'[') && self.scan_ch(b'!') {
230 let tag = if self.scan_case_insensitive(b"note") {
231 Some(BlockQuoteKind::Note)
232 } else if self.scan_case_insensitive(b"tip") {
233 Some(BlockQuoteKind::Tip)
234 } else if self.scan_case_insensitive(b"important") {
235 Some(BlockQuoteKind::Important)
236 } else if self.scan_case_insensitive(b"warning") {
237 Some(BlockQuoteKind::Warning)
238 } else if self.scan_case_insensitive(b"caution") {
239 Some(BlockQuoteKind::Caution)
240 } else {
241 None
242 };
243 if tag.is_some() && self.scan_ch(b']') {
244 if let Some(nl) = scan_blank_line(&self.bytes[self.ix..]) {
245 self.ix += nl;
246 tag
247 } else {
248 None
249 }
250 } else {
251 None
252 }
253 } else {
254 None
255 };
256 if tag.is_none() {
257 self.ix = saved_ix;
258 }
259 tag
260 }
261
262 pub(crate) fn scan_blockquote_marker(&mut self) -> bool {
263 let save = self.clone();
264 let _ = self.scan_space(3);
265 if self.scan_ch(b'>') {
266 let _ = self.scan_space(1);
267 true
268 } else {
269 *self = save;
270 false
271 }
272 }
273
274 /// Scan a list marker.
275 ///
276 /// Return value is the character, the start index, and the indent in spaces.
277 /// For ordered list markers, the character will be one of b'.' or b')'. For
278 /// bullet list markers, it will be one of b'-', b'+', or b'*'.
279 pub(crate) fn scan_list_marker(&mut self) -> Option<(u8, u64, usize)> {
280 let save = self.clone();
281 let indent = self.scan_space_upto(4);
282 if indent < 4 && self.ix < self.bytes.len() {
283 let c = self.bytes[self.ix];
284 if c == b'-' || c == b'+' || c == b'*' {
285 if self.ix >= self.min_hrule_offset {
286 // there could be an hrule here
287 if let Err(min_offset) = scan_hrule(&self.bytes[self.ix..]) {
288 self.min_hrule_offset = min_offset;
289 } else {
290 *self = save;
291 return None;
292 }
293 }
294 self.ix += 1;
295 if self.scan_space(1) || self.is_at_eol() {
296 return self.finish_list_marker(c, 0, indent + 2);
297 }
298 } else if c.is_ascii_digit() {
299 let start_ix = self.ix;
300 let mut ix = self.ix + 1;
301 let mut val = u64::from(c - b'0');
302 while ix < self.bytes.len() && ix - start_ix < 10 {
303 let c = self.bytes[ix];
304 ix += 1;
305 if c.is_ascii_digit() {
306 val = val * 10 + u64::from(c - b'0');
307 } else if c == b')' || c == b'.' {
308 self.ix = ix;
309 if self.scan_space(1) || self.is_at_eol() {
310 return self.finish_list_marker(c, val, indent + 1 + ix - start_ix);
311 } else {
312 break;
313 }
314 } else {
315 break;
316 }
317 }
318 }
319 }
320 *self = save;
321 None
322 }
323
324 fn finish_list_marker(
325 &mut self,
326 c: u8,
327 start: u64,
328 mut indent: usize,
329 ) -> Option<(u8, u64, usize)> {
330 let save = self.clone();
331
332 // skip the rest of the line if it's blank
333 if scan_blank_line(&self.bytes[self.ix..]).is_some() {
334 return Some((c, start, indent));
335 }
336
337 let post_indent = self.scan_space_upto(4);
338 if post_indent < 4 {
339 indent += post_indent;
340 } else {
341 *self = save;
342 }
343 Some((c, start, indent))
344 }
345
346 /// Returns Some(is_checked) when a task list marker was found. Resets itself
347 /// to original state otherwise.
348 pub(crate) fn scan_task_list_marker(&mut self) -> Option<bool> {
349 let save = self.clone();
350 self.scan_space_upto(3);
351
352 if !self.scan_ch(b'[') {
353 *self = save;
354 return None;
355 }
356 let is_checked = match self.bytes.get(self.ix) {
357 Some(&c) if is_ascii_whitespace_no_nl(c) => {
358 self.ix += 1;
359 false
360 }
361 Some(b'x') | Some(b'X') => {
362 self.ix += 1;
363 true
364 }
365 _ => {
366 *self = save;
367 return None;
368 }
369 };
370 if !self.scan_ch(b']') {
371 *self = save;
372 return None;
373 }
374 if !self
375 .bytes
376 .get(self.ix)
377 .map(|&b| is_ascii_whitespace_no_nl(b))
378 .unwrap_or(false)
379 {
380 *self = save;
381 return None;
382 }
383 Some(is_checked)
384 }
385
386 pub(crate) fn bytes_scanned(&self) -> usize {
387 self.ix
388 }
389
390 pub(crate) fn remaining_space(&self) -> usize {
391 self.spaces_remaining
392 }
393}
394
395pub(crate) fn is_ascii_whitespace(c: u8) -> bool {
396 (0x09..=0x0d).contains(&c) || c == b' '
397}
398
399pub(crate) fn is_ascii_whitespace_no_nl(c: u8) -> bool {
400 c == b'\t' || c == 0x0b || c == 0x0c || c == b' '
401}
402
403fn is_ascii_alpha(c: u8) -> bool {
404 c.is_ascii_alphabetic()
405}
406
407fn is_ascii_alphanumeric(c: u8) -> bool {
408 matches!(c, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z')
409}
410
411fn is_ascii_letterdigitdash(c: u8) -> bool {
412 c == b'-' || is_ascii_alphanumeric(c)
413}
414
415fn is_digit(c: u8) -> bool {
416 c.is_ascii_digit()
417}
418
419fn is_valid_unquoted_attr_value_char(c: u8) -> bool {
420 !matches!(
421 c,
422 b'\'' | b'"' | b' ' | b'=' | b'>' | b'<' | b'`' | b'\n' | b'\r'
423 )
424}
425
426// scan a single character
427pub(crate) fn scan_ch(data: &[u8], c: u8) -> usize {
428 if !data.is_empty() && data[0] == c {
429 1
430 } else {
431 0
432 }
433}
434
435pub(crate) fn scan_while<F>(data: &[u8], mut f: F) -> usize
436where
437 F: FnMut(u8) -> bool,
438{
439 data.iter().take_while(|&&c: u8| f(c)).count()
440}
441
442pub(crate) fn scan_rev_while<F>(data: &[u8], mut f: F) -> usize
443where
444 F: FnMut(u8) -> bool,
445{
446 data.iter().rev().take_while(|&&c: u8| f(c)).count()
447}
448
449pub(crate) fn scan_ch_repeat(data: &[u8], c: u8) -> usize {
450 scan_while(data, |x: u8| x == c)
451}
452
453// Note: this scans ASCII whitespace only, for Unicode whitespace use
454// a different function.
455pub(crate) fn scan_whitespace_no_nl(data: &[u8]) -> usize {
456 scan_while(data, f:is_ascii_whitespace_no_nl)
457}
458
459fn scan_attr_value_chars(data: &[u8]) -> usize {
460 scan_while(data, f:is_valid_unquoted_attr_value_char)
461}
462
463pub(crate) fn scan_eol(bytes: &[u8]) -> Option<usize> {
464 if bytes.is_empty() {
465 return Some(0);
466 }
467 match bytes[0] {
468 b'\n' => Some(1),
469 b'\r' => Some(if bytes.get(index:1) == Some(&b'\n') { 2 } else { 1 }),
470 _ => None,
471 }
472}
473
474pub(crate) fn scan_blank_line(bytes: &[u8]) -> Option<usize> {
475 let i: usize = scan_whitespace_no_nl(data:bytes);
476 scan_eol(&bytes[i..]).map(|n: usize| i + n)
477}
478
479pub(crate) fn scan_nextline(bytes: &[u8]) -> usize {
480 memchr(b'\n', bytes).map_or(default:bytes.len(), |x: usize| x + 1)
481}
482
483// return: end byte for closing code fence, or None
484// if the line is not a closing code fence
485pub(crate) fn scan_closing_code_fence(
486 bytes: &[u8],
487 fence_char: u8,
488 n_fence_char: usize,
489) -> Option<usize> {
490 if bytes.is_empty() {
491 return Some(0);
492 }
493 let mut i: usize = 0;
494 let num_fence_chars_found: usize = scan_ch_repeat(&bytes[i..], c:fence_char);
495 if num_fence_chars_found < n_fence_char {
496 return None;
497 }
498 i += num_fence_chars_found;
499 let num_trailing_spaces: usize = scan_ch_repeat(&bytes[i..], c:b' ');
500 i += num_trailing_spaces;
501 scan_eol(&bytes[i..]).map(|_| i)
502}
503
504// return: end byte for closing metadata block, or None
505// if the line is not a closing metadata block
506pub(crate) fn scan_closing_metadata_block(bytes: &[u8], fence_char: u8) -> Option<usize> {
507 let mut i: usize = 0;
508 let mut num_fence_chars_found: usize = scan_ch_repeat(&bytes[i..], c:fence_char);
509 if num_fence_chars_found != 3 {
510 // if YAML style metadata block the closing character can also be `.`
511 if fence_char == b'-' {
512 num_fence_chars_found = scan_ch_repeat(&bytes[i..], c:b'.');
513 if num_fence_chars_found != 3 {
514 return None;
515 }
516 } else {
517 return None;
518 }
519 }
520 i += num_fence_chars_found;
521 let num_trailing_spaces: usize = scan_ch_repeat(&bytes[i..], c:b' ');
522 i += num_trailing_spaces;
523 scan_eol(&bytes[i..]).map(|_| i)
524}
525
526// returned pair is (number of bytes, number of spaces)
527pub(crate) fn calc_indent(text: &[u8], max: usize) -> (usize, usize) {
528 let mut spaces = 0;
529 let mut offset = 0;
530
531 for (i, &b) in text.iter().enumerate() {
532 offset = i;
533 match b {
534 b' ' => {
535 spaces += 1;
536 if spaces == max {
537 break;
538 }
539 }
540 b'\t' => {
541 let new_spaces = spaces + 4 - (spaces & 3);
542 if new_spaces > max {
543 break;
544 }
545 spaces = new_spaces;
546 }
547 _ => break,
548 }
549 }
550
551 (offset, spaces)
552}
553
554/// Scan hrule opening sequence.
555///
556/// Returns Ok(x) when it finds an hrule, where x is the
557/// size of line containing the hrule, including the trailing newline.
558///
559/// Returns Err(x) when it does not find an hrule and x is
560/// the offset in data before no hrule can appear.
561pub(crate) fn scan_hrule(bytes: &[u8]) -> Result<usize, usize> {
562 if bytes.len() < 3 {
563 return Err(0);
564 }
565 let c = bytes[0];
566 if !(c == b'*' || c == b'-' || c == b'_') {
567 return Err(0);
568 }
569 let mut n = 0;
570 let mut i = 0;
571
572 while i < bytes.len() {
573 match bytes[i] {
574 b'\n' | b'\r' => {
575 i += scan_eol(&bytes[i..]).unwrap_or(0);
576 break;
577 }
578 c2 if c2 == c => {
579 n += 1;
580 }
581 b' ' | b'\t' => (),
582 _ => return Err(i),
583 }
584 i += 1;
585 }
586 if n >= 3 {
587 Ok(i)
588 } else {
589 Err(i)
590 }
591}
592
593/// Scan an ATX heading opening sequence.
594///
595/// Returns number of bytes in prefix and level.
596pub(crate) fn scan_atx_heading(data: &[u8]) -> Option<HeadingLevel> {
597 let level: usize = scan_ch_repeat(data, c:b'#');
598 if data.get(level).copied().map_or(default:true, f:is_ascii_whitespace) {
599 HeadingLevel::try_from(level).ok()
600 } else {
601 None
602 }
603}
604
605/// Scan a setext heading underline.
606///
607/// Returns number of bytes in line (including trailing newline) and level.
608pub(crate) fn scan_setext_heading(data: &[u8]) -> Option<(usize, HeadingLevel)> {
609 let c: u8 = *data.first()?;
610 let level: HeadingLevel = if c == b'=' {
611 HeadingLevel::H1
612 } else if c == b'-' {
613 HeadingLevel::H2
614 } else {
615 return None;
616 };
617 let mut i: usize = 1 + scan_ch_repeat(&data[1..], c);
618 i += scan_blank_line(&data[i..])?;
619 Some((i, level))
620}
621
622// returns number of bytes in line (including trailing
623// newline) and column alignments
624pub(crate) fn scan_table_head(data: &[u8]) -> (usize, Vec<Alignment>) {
625 let (mut i, spaces) = calc_indent(data, 4);
626 if spaces > 3 || i == data.len() {
627 return (0, vec![]);
628 }
629 let mut cols = vec![];
630 let mut active_col = Alignment::None;
631 let mut start_col = true;
632 let mut found_pipe = false;
633 let mut found_hyphen = false;
634 let mut found_hyphen_in_col = false;
635 if data[i] == b'|' {
636 i += 1;
637 found_pipe = true;
638 }
639 for c in &data[i..] {
640 if let Some(n) = scan_eol(&data[i..]) {
641 i += n;
642 break;
643 }
644 match *c {
645 b' ' => (),
646 b':' => {
647 active_col = match (start_col, active_col) {
648 (true, Alignment::None) => Alignment::Left,
649 (false, Alignment::Left) => Alignment::Center,
650 (false, Alignment::None) => Alignment::Right,
651 _ => active_col,
652 };
653 start_col = false;
654 }
655 b'-' => {
656 start_col = false;
657 found_hyphen = true;
658 found_hyphen_in_col = true;
659 }
660 b'|' => {
661 start_col = true;
662 found_pipe = true;
663 cols.push(active_col);
664 active_col = Alignment::None;
665 if !found_hyphen_in_col {
666 // It isn't a table head if it has back-to-back pipes.
667 return (0, vec![]);
668 }
669 found_hyphen_in_col = false;
670 }
671 _ => {
672 // It isn't a table head if it has characters outside the allowed set.
673 return (0, vec![]);
674 }
675 }
676 i += 1;
677 }
678
679 if !start_col {
680 cols.push(active_col);
681 }
682 if !found_pipe || !found_hyphen {
683 // It isn't a table head if it doesn't have a least one pipe or hyphen.
684 // It's a list, a header, or a thematic break.
685 return (0, vec![]);
686 }
687
688 (i, cols)
689}
690
691/// Scan code fence.
692///
693/// Returns number of bytes scanned and the char that is repeated to make the code fence.
694pub(crate) fn scan_code_fence(data: &[u8]) -> Option<(usize, u8)> {
695 let c: u8 = *data.first()?;
696 if !(c == b'`' || c == b'~') {
697 return None;
698 }
699 let i: usize = 1 + scan_ch_repeat(&data[1..], c);
700 if i >= 3 {
701 if c == b'`' {
702 let suffix: &[u8] = &data[i..];
703 let next_line: usize = i + scan_nextline(bytes:suffix);
704 // FIXME: make sure this is correct
705 if suffix[..(next_line - i)].iter().any(|&b: u8| b == b'`') {
706 return None;
707 }
708 }
709 Some((i, c))
710 } else {
711 None
712 }
713}
714
715/// Scan metadata block, returning the number of delimiter bytes
716/// (always 3 for now) and the delimiter character.
717///
718/// Differently to code blocks, metadata blocks must be closed with the closing
719/// sequence not being a valid terminator the end of the file.
720///
721/// In addition, they cannot be empty (closing sequence in the next line) and
722/// the next line cannot be an empty line.
723pub(crate) fn scan_metadata_block(
724 data: &[u8],
725 yaml_style_enabled: bool,
726 pluses_style_enabled: bool,
727) -> Option<(usize, u8)> {
728 // Only if metadata blocks are enabled
729 if yaml_style_enabled || pluses_style_enabled {
730 let c = *data.first()?;
731 if !((c == b'-' && yaml_style_enabled) || (c == b'+' && pluses_style_enabled)) {
732 return None;
733 }
734 let i = 1 + scan_ch_repeat(&data[1..], c);
735 // Only trailing spaces after the delimiters in the line
736 let next_line = scan_nextline(&data[i..]);
737 for c in &data[i..i + next_line] {
738 if !c.is_ascii_whitespace() {
739 return None;
740 }
741 }
742 if i == 3 {
743 // Search the closing sequence
744 let mut j = i;
745 let mut first_line = true;
746 while j < data.len() {
747 j += scan_nextline(&data[j..]);
748 let closed = scan_closing_metadata_block(&data[j..], c).is_some();
749 // The first line of the metadata block cannot be an empty line
750 // nor the end of the block
751 if first_line {
752 if closed || scan_blank_line(&data[j..]).is_some() {
753 return None;
754 }
755 first_line = false;
756 }
757 if closed {
758 return Some((i, c));
759 }
760 }
761 None
762 } else {
763 None
764 }
765 } else {
766 None
767 }
768}
769
770pub(crate) fn scan_blockquote_start(data: &[u8]) -> Option<usize> {
771 if data.first().copied() == Some(b'>') {
772 let space: usize = if data.get(index:1).copied() == Some(b' ') {
773 1
774 } else {
775 0
776 };
777 Some(1 + space)
778 } else {
779 None
780 }
781}
782
783/// return number of bytes scanned, delimiter, start index, and indent
784pub(crate) fn scan_listitem(bytes: &[u8]) -> Option<(usize, u8, usize, usize)> {
785 let mut c = *bytes.first()?;
786 let (w, start) = match c {
787 b'-' | b'+' | b'*' => (1, 0),
788 b'0'..=b'9' => {
789 let (length, start) = parse_decimal(bytes, 9);
790 c = *bytes.get(length)?;
791 if !(c == b'.' || c == b')') {
792 return None;
793 }
794 (length + 1, start)
795 }
796 _ => {
797 return None;
798 }
799 };
800 // TODO: replace calc_indent with scan_leading_whitespace, for tab correctness
801 let (mut postn, mut postindent) = calc_indent(&bytes[w..], 5);
802 if postindent == 0 {
803 scan_eol(&bytes[w..])?;
804 postindent += 1;
805 } else if postindent > 4 {
806 postn = 1;
807 postindent = 1;
808 }
809 if scan_blank_line(&bytes[w..]).is_some() {
810 postn = 0;
811 postindent = 1;
812 }
813 Some((w + postn, c, start, w + postindent))
814}
815
816// returns (number of bytes, parsed decimal)
817fn parse_decimal(bytes: &[u8], limit: usize) -> (usize, usize) {
818 match bytes
819 .iter()
820 .take(limit)
821 .take_while(|&&b| is_digit(b))
822 .try_fold((0, 0usize), |(count: usize, acc: usize), c: &u8| {
823 let digit: usize = usize::from(c - b'0');
824 match accOption
825 .checked_mul(10)
826 .and_then(|ten_acc: usize| ten_acc.checked_add(digit))
827 {
828 Some(number: usize) => Ok((count + 1, number)),
829 // stop early on overflow
830 None => Err((count, acc)),
831 }
832 }) {
833 Ok(p: (usize, usize)) | Err(p: (usize, usize)) => p,
834 }
835}
836
837// returns (number of bytes, parsed hex)
838fn parse_hex(bytes: &[u8], limit: usize) -> (usize, usize) {
839 match bytes
840 .iter()
841 .take(limit)
842 .try_fold((0, 0usize), |(count, acc), c| {
843 let mut c = *c;
844 let digit = if c.is_ascii_digit() {
845 usize::from(c - b'0')
846 } else {
847 // make lower case
848 c |= 0x20;
849 if (b'a'..=b'f').contains(&c) {
850 usize::from(c - b'a' + 10)
851 } else {
852 return Err((count, acc));
853 }
854 };
855 match acc
856 .checked_mul(16)
857 .and_then(|sixteen_acc| sixteen_acc.checked_add(digit))
858 {
859 Some(number) => Ok((count + 1, number)),
860 // stop early on overflow
861 None => Err((count, acc)),
862 }
863 }) {
864 Ok(p) | Err(p) => p,
865 }
866}
867
868fn char_from_codepoint(input: usize) -> Option<char> {
869 let codepoint: u32 = input.try_into().ok()?;
870 if codepoint == 0 {
871 return None;
872 }
873 char::from_u32(codepoint)
874}
875
876// doesn't bother to check data[0] == '&'
877pub(crate) fn scan_entity(bytes: &[u8]) -> (usize, Option<CowStr<'static>>) {
878 let mut end = 1;
879 if scan_ch(&bytes[end..], b'#') == 1 {
880 end += 1;
881 let (bytecount, codepoint) = if end < bytes.len() && bytes[end] | 0x20 == b'x' {
882 end += 1;
883 parse_hex(&bytes[end..], 6)
884 } else {
885 parse_decimal(&bytes[end..], 7)
886 };
887 end += bytecount;
888 return if bytecount == 0 || scan_ch(&bytes[end..], b';') == 0 {
889 (0, None)
890 } else {
891 (
892 end + 1,
893 Some(char_from_codepoint(codepoint).unwrap_or('\u{FFFD}').into()),
894 )
895 };
896 }
897 end += scan_while(&bytes[end..], is_ascii_alphanumeric);
898 if scan_ch(&bytes[end..], b';') == 1 {
899 if let Some(value) = entities::get_entity(&bytes[1..end]) {
900 return (end + 1, Some(value.into()));
901 }
902 }
903 (0, None)
904}
905
906// note: dest returned is raw, still needs to be unescaped
907// TODO: check that nested parens are really not allowed for refdefs
908// TODO(performance): this func should probably its own unescaping
909pub(crate) fn scan_link_dest(
910 data: &str,
911 start_ix: usize,
912 max_next: usize,
913) -> Option<(usize, &str)> {
914 let bytes = &data.as_bytes()[start_ix..];
915 let mut i = scan_ch(bytes, b'<');
916
917 if i != 0 {
918 // pointy links
919 while i < bytes.len() {
920 match bytes[i] {
921 b'\n' | b'\r' | b'<' => return None,
922 b'>' => return Some((i + 1, &data[(start_ix + 1)..(start_ix + i)])),
923 b'\\' if i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) => {
924 i += 1;
925 }
926 _ => {}
927 }
928 i += 1;
929 }
930 None
931 } else {
932 // non-pointy links
933 let mut nest = 0;
934 while i < bytes.len() {
935 match bytes[i] {
936 0x0..=0x20 => {
937 break;
938 }
939 b'(' => {
940 if nest > max_next {
941 return None;
942 }
943 nest += 1;
944 }
945 b')' => {
946 if nest == 0 {
947 break;
948 }
949 nest -= 1;
950 }
951 b'\\' if i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) => {
952 i += 1;
953 }
954 _ => {}
955 }
956 i += 1;
957 }
958 if nest != 0 {
959 return None;
960 }
961 Some((i, &data[start_ix..(start_ix + i)]))
962 }
963}
964
965/// Returns bytes scanned
966fn scan_attribute_name(data: &[u8]) -> Option<usize> {
967 let (&c: u8, tail: &[u8]) = data.split_first()?;
968 if is_ascii_alpha(c) || c == b'_' || c == b':' {
969 Some(
970 1 + scan_while(data:tail, |c: u8| {
971 is_ascii_alphanumeric(c) || c == b'_' || c == b'.' || c == b':' || c == b'-'
972 }),
973 )
974 } else {
975 None
976 }
977}
978
979/// Returns the index immediately following the attribute on success.
980/// The argument `buffer_ix` refers to the index into `data` from which we
981/// should copy into `buffer` when we find bytes to skip.
982fn scan_attribute(
983 data: &[u8],
984 mut ix: usize,
985 newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
986 buffer: &mut Vec<u8>,
987 buffer_ix: &mut usize,
988) -> Option<usize> {
989 ix += scan_attribute_name(&data[ix..])?;
990 let ix_after_attribute: usize = ix;
991 ix = scan_whitespace_with_newline_handler_without_buffer(data, i:ix, newline_handler)?;
992 if scan_ch(&data[ix..], c:b'=') == 1 {
993 ix = scan_whitespace_with_newline_handler(data, i:ix_after_attribute, newline_handler, buffer, buffer_ix)?;
994 ix += 1;
995 ix = scan_whitespace_with_newline_handler(data, i:ix, newline_handler, buffer, buffer_ix)?;
996 ix = scan_attribute_value(data, i:ix, newline_handler, buffer, buffer_ix)?;
997 Some(ix)
998 } else {
999 // Leave whitespace for next attribute.
1000 Some(ix_after_attribute)
1001 }
1002}
1003
1004/// Scans whitespace and possibly newlines according to the
1005/// behavior defined by the newline handler. When bytes are skipped,
1006/// all preceding non-skipped bytes are pushed to the buffer.
1007fn scan_whitespace_with_newline_handler(
1008 data: &[u8],
1009 mut i: usize,
1010 newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
1011 buffer: &mut Vec<u8>,
1012 buffer_ix: &mut usize,
1013) -> Option<usize> {
1014 while i < data.len() {
1015 if !is_ascii_whitespace(data[i]) {
1016 return Some(i);
1017 }
1018 if let Some(eol_bytes: usize) = scan_eol(&data[i..]) {
1019 let handler: &dyn Fn(&[u8]) -> usize = newline_handler?;
1020 i += eol_bytes;
1021 let skipped_bytes: usize = handler(&data[i..]);
1022
1023 if skipped_bytes > 0 {
1024 buffer.extend(&data[*buffer_ix..i]);
1025 *buffer_ix = i + skipped_bytes;
1026 }
1027
1028 i += skipped_bytes;
1029 } else {
1030 i += 1;
1031 }
1032 }
1033
1034 Some(i)
1035}
1036
1037/// Scans whitespace and possible newlines according to the behavior defined
1038/// by the newline handler.
1039///
1040/// Unlike [`scan_whitespace_with_newline_handler`], this function doesn't
1041/// copy skipped data into a buffer. Typically, if this function
1042/// returns `Some`, a call to `scan_whitespace_with_newline_handler` will
1043/// soon follow.
1044fn scan_whitespace_with_newline_handler_without_buffer(
1045 data: &[u8],
1046 mut i: usize,
1047 newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
1048) -> Option<usize> {
1049 while i < data.len() {
1050 if !is_ascii_whitespace(data[i]) {
1051 return Some(i);
1052 }
1053 if let Some(eol_bytes: usize) = scan_eol(&data[i..]) {
1054 let handler: &dyn Fn(&[u8]) -> usize = newline_handler?;
1055 i += eol_bytes;
1056 let skipped_bytes: usize = handler(&data[i..]);
1057 i += skipped_bytes;
1058 } else {
1059 i += 1;
1060 }
1061 }
1062
1063 Some(i)
1064}
1065
1066/// Returns the index immediately following the attribute value on success.
1067fn scan_attribute_value(
1068 data: &[u8],
1069 mut i: usize,
1070 newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
1071 buffer: &mut Vec<u8>,
1072 buffer_ix: &mut usize,
1073) -> Option<usize> {
1074 match *data.get(i)? {
1075 b @ b'"' | b @ b'\'' => {
1076 i += 1;
1077 while i < data.len() {
1078 if data[i] == b {
1079 return Some(i + 1);
1080 }
1081 if let Some(eol_bytes) = scan_eol(&data[i..]) {
1082 let handler = newline_handler?;
1083 i += eol_bytes;
1084 let skipped_bytes = handler(&data[i..]);
1085
1086 if skipped_bytes > 0 {
1087 buffer.extend(&data[*buffer_ix..i]);
1088 *buffer_ix = i + skipped_bytes;
1089 }
1090 i += skipped_bytes;
1091 } else {
1092 i += 1;
1093 }
1094 }
1095 return None;
1096 }
1097 b' ' | b'=' | b'>' | b'<' | b'`' | b'\n' | b'\r' => {
1098 return None;
1099 }
1100 _ => {
1101 // unquoted attribute value
1102 i += scan_attr_value_chars(&data[i..]);
1103 }
1104 }
1105
1106 Some(i)
1107}
1108
1109// Remove backslash escapes and resolve entities
1110pub(crate) fn unescape<'a, I: Into<CowStr<'a>>>(input: I, is_in_table: bool) -> CowStr<'a> {
1111 let input = input.into();
1112 let mut result = String::new();
1113 let mut mark = 0;
1114 let mut i = 0;
1115 let bytes = input.as_bytes();
1116 while i < bytes.len() {
1117 match bytes[i] {
1118 // Tables are special, because they're parsed as-if the tables
1119 // were parsed in a discrete pass, changing `\|` to `|`, and then
1120 // passing the changed string to the inline parser.
1121 b'\\'
1122 if is_in_table
1123 && i + 2 < bytes.len()
1124 && bytes[i + 1] == b'\\'
1125 && bytes[i + 2] == b'|' =>
1126 {
1127 // even number of `\`s before pipe
1128 // odd number is handled in the normal way below
1129 result.push_str(&input[mark..i]);
1130 mark = i + 2;
1131 i += 3;
1132 }
1133 b'\\' if i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) => {
1134 result.push_str(&input[mark..i]);
1135 mark = i + 1;
1136 i += 2;
1137 }
1138 b'&' => match scan_entity(&bytes[i..]) {
1139 (n, Some(value)) => {
1140 result.push_str(&input[mark..i]);
1141 result.push_str(&value);
1142 i += n;
1143 mark = i;
1144 }
1145 _ => i += 1,
1146 },
1147 b'\r' => {
1148 result.push_str(&input[mark..i]);
1149 i += 1;
1150 mark = i;
1151 }
1152 _ => i += 1,
1153 }
1154 }
1155 if mark == 0 {
1156 input
1157 } else {
1158 result.push_str(&input[mark..]);
1159 result.into()
1160 }
1161}
1162
1163/// Assumes `data` is preceded by `<`.
1164pub(crate) fn starts_html_block_type_6(data: &[u8]) -> bool {
1165 let i: usize = scan_ch(data, c:b'/');
1166 let tail: &[u8] = &data[i..];
1167 let n: usize = scan_while(data:tail, f:is_ascii_alphanumeric);
1168 if !is_html_tag(&tail[..n]) {
1169 return false;
1170 }
1171 // Starting condition says the next byte must be either a space, a tab,
1172 // the end of the line, the string >, or the string />
1173 let tail: &[u8] = &tail[n..];
1174 tail.is_empty()
1175 || tail[0] == b' '
1176 || tail[0] == b'\t'
1177 || tail[0] == b'\r'
1178 || tail[0] == b'\n'
1179 || tail[0] == b'>'
1180 || tail.len() >= 2 && &tail[..2] == b"/>"
1181}
1182
1183fn is_html_tag(tag: &[u8]) -> bool {
1184 HTML_TAGS
1185 .binary_search_by(|probe: &&str| {
1186 let probe_bytes_iter: Iter<'_, u8> = probe.as_bytes().iter();
1187 let tag_bytes_iter: Iter<'_, u8> = tag.iter();
1188
1189 probe_bytes_iterOption
1190 .zip(tag_bytes_iter)
1191 .find_map(|(&a: u8, &b: u8)| {
1192 // We can compare case insensitively because the probes are
1193 // all lower case alpha strings.
1194 match a.cmp(&(b | 0x20)) {
1195 std::cmp::Ordering::Equal => None,
1196 inequality: Ordering => Some(inequality),
1197 }
1198 })
1199 .unwrap_or_else(|| probe.len().cmp(&tag.len()))
1200 })
1201 .is_ok()
1202}
1203
1204/// Assumes that `data` starts with `<`.
1205/// Returns the index into data directly after the html tag on success.
1206pub(crate) fn scan_html_type_7(data: &[u8]) -> Option<usize> {
1207 // Block type html does not allow for newlines, so we
1208 // do not pass a newline handler.
1209 let (_span: Vec, i: usize) = scan_html_block_inner(data, newline_handler:None)?;
1210 scan_blank_line(&data[i..])?;
1211 Some(i)
1212}
1213
1214/// Assumes that `data` starts with `<`.
1215/// Returns the number of bytes scanned and the html in case of
1216/// success.
1217/// When some bytes were skipped, because the html was split over
1218/// multiple leafs (e.g. over multiple lines in a blockquote),
1219/// the html is returned as a vector of bytes.
1220/// If no bytes were skipped, the buffer will be empty.
1221pub(crate) fn scan_html_block_inner(
1222 data: &[u8],
1223 newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
1224) -> Option<(Vec<u8>, usize)> {
1225 let mut buffer = Vec::new();
1226 let mut last_buf_index = 0;
1227
1228 let close_tag_bytes = scan_ch(&data[1..], b'/');
1229 let l = scan_while(&data[(1 + close_tag_bytes)..], is_ascii_alpha);
1230 if l == 0 {
1231 return None;
1232 }
1233 let mut i = 1 + close_tag_bytes + l;
1234 i += scan_while(&data[i..], is_ascii_letterdigitdash);
1235
1236 if close_tag_bytes == 0 {
1237 loop {
1238 let old_i = i;
1239 loop {
1240 i += scan_whitespace_no_nl(&data[i..]);
1241 if let Some(eol_bytes) = scan_eol(&data[i..]) {
1242 if eol_bytes == 0 {
1243 return None;
1244 }
1245 let handler = newline_handler?;
1246 i += eol_bytes;
1247 let skipped_bytes = handler(&data[i..]);
1248
1249 let data_len = data.len() - i;
1250
1251 debug_assert!(
1252 skipped_bytes <= data_len,
1253 "Handler tried to skip too many bytes, fed {}, skipped {}",
1254 data_len,
1255 skipped_bytes
1256 );
1257
1258 if skipped_bytes > 0 {
1259 buffer.extend(&data[last_buf_index..i]);
1260 i += skipped_bytes;
1261 last_buf_index = i;
1262 }
1263 } else {
1264 break;
1265 }
1266 }
1267 if let Some(b'/') | Some(b'>') = data.get(i) {
1268 break;
1269 }
1270 if old_i == i {
1271 // No whitespace, which is mandatory.
1272 return None;
1273 }
1274 i = scan_attribute(data, i, newline_handler, &mut buffer, &mut last_buf_index)?;
1275 }
1276 }
1277
1278 i += scan_whitespace_no_nl(&data[i..]);
1279
1280 if close_tag_bytes == 0 {
1281 i += scan_ch(&data[i..], b'/');
1282 }
1283
1284 if scan_ch(&data[i..], b'>') == 0 {
1285 None
1286 } else {
1287 i += 1;
1288 if !buffer.is_empty() {
1289 buffer.extend(&data[last_buf_index..i]);
1290 }
1291 Some((buffer, i))
1292 }
1293}
1294
1295/// Returns (next_byte_offset, uri, type)
1296pub(crate) fn scan_autolink(text: &str, start_ix: usize) -> Option<(usize, CowStr<'_>, LinkType)> {
1297 scan_uriOption<(usize, CowStr<'_>, …)>(text, start_ix)
1298 .map(|(bytes: usize, uri: CowStr<'_>)| (bytes, uri, LinkType::Autolink))
1299 .or_else(|| scan_email(text, start_ix).map(|(bytes: usize, uri: CowStr<'_>)| (bytes, uri, LinkType::Email)))
1300}
1301
1302/// Returns (next_byte_offset, uri)
1303fn scan_uri(text: &str, start_ix: usize) -> Option<(usize, CowStr<'_>)> {
1304 let bytes = &text.as_bytes()[start_ix..];
1305
1306 // scheme's first byte must be an ascii letter
1307 if bytes.is_empty() || !is_ascii_alpha(bytes[0]) {
1308 return None;
1309 }
1310
1311 let mut i = 1;
1312
1313 while i < bytes.len() {
1314 let c = bytes[i];
1315 i += 1;
1316 match c {
1317 c if is_ascii_alphanumeric(c) => (),
1318 b'.' | b'-' | b'+' => (),
1319 b':' => break,
1320 _ => return None,
1321 }
1322 }
1323
1324 // scheme length must be between 2 and 32 characters long. scheme
1325 // must be followed by colon
1326 if !(3..=33).contains(&i) {
1327 return None;
1328 }
1329
1330 while i < bytes.len() {
1331 match bytes[i] {
1332 b'>' => return Some((start_ix + i + 1, text[start_ix..(start_ix + i)].into())),
1333 b'\0'..=b' ' | b'<' => return None,
1334 _ => (),
1335 }
1336 i += 1;
1337 }
1338
1339 None
1340}
1341
1342/// Returns (next_byte_offset, email)
1343fn scan_email(text: &str, start_ix: usize) -> Option<(usize, CowStr<'_>)> {
1344 // using a regex library would be convenient, but doing it by hand is not too bad
1345 let bytes = &text.as_bytes()[start_ix..];
1346 let mut i = 0;
1347
1348 while i < bytes.len() {
1349 let c = bytes[i];
1350 i += 1;
1351 match c {
1352 c if is_ascii_alphanumeric(c) => (),
1353 b'.' | b'!' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'*' | b'+' | b'/' | b'=' | b'?'
1354 | b'^' | b'_' | b'`' | b'{' | b'|' | b'}' | b'~' | b'-' => (),
1355 b'@' if i > 1 => break,
1356 _ => return None,
1357 }
1358 }
1359
1360 loop {
1361 let label_start_ix = i;
1362 let mut fresh_label = true;
1363
1364 while i < bytes.len() {
1365 match bytes[i] {
1366 c if is_ascii_alphanumeric(c) => (),
1367 b'-' if fresh_label => {
1368 return None;
1369 }
1370 b'-' => (),
1371 _ => break,
1372 }
1373 fresh_label = false;
1374 i += 1;
1375 }
1376
1377 if i == label_start_ix || i - label_start_ix > 63 || bytes[i - 1] == b'-' {
1378 return None;
1379 }
1380
1381 if scan_ch(&bytes[i..], b'.') == 0 {
1382 break;
1383 }
1384 i += 1;
1385 }
1386
1387 if scan_ch(&bytes[i..], b'>') == 0 {
1388 return None;
1389 }
1390
1391 Some((start_ix + i + 1, text[start_ix..(start_ix + i)].into()))
1392}
1393
1394/// Scan comment, declaration, or CDATA section, with initial "<!" already consumed.
1395/// Returns byte offset on match.
1396pub(crate) fn scan_inline_html_comment(
1397 bytes: &[u8],
1398 mut ix: usize,
1399 scan_guard: &mut HtmlScanGuard,
1400) -> Option<usize> {
1401 let c = *bytes.get(ix)?;
1402 ix += 1;
1403 match c {
1404 // An HTML comment consists of `<!-->`, `<!--->`, or `<!--`, a string of characters not
1405 // including the string `-->`, and `-->`.
1406 b'-' if ix > scan_guard.comment => {
1407 // HTML comment needs two hyphens after the !.
1408 if *bytes.get(ix)? != b'-' {
1409 return None;
1410 }
1411 // Yes, we're intentionally going backwards.
1412 // We want the cursor to point here:
1413 //
1414 // <!--
1415 // ^
1416 //
1417 // This way, the `<!-->` case is covered by the loop below.
1418 ix -= 1;
1419
1420 while let Some(x) = memchr(b'-', &bytes[ix..]) {
1421 ix += x + 1;
1422 scan_guard.comment = ix;
1423 if scan_ch(&bytes[ix..], b'-') == 1 && scan_ch(&bytes[ix + 1..], b'>') == 1 {
1424 return Some(ix + 2);
1425 }
1426 }
1427 None
1428 }
1429 // A CDATA section consists of the string `<![CDATA[`, a string of characters not
1430 // including the string `]]>`, and the string `]]>`.
1431 b'[' if bytes[ix..].starts_with(b"CDATA[") && ix > scan_guard.cdata => {
1432 ix += b"CDATA[".len();
1433 ix = memchr(b']', &bytes[ix..]).map_or(bytes.len(), |x| ix + x);
1434 let close_brackets = scan_ch_repeat(&bytes[ix..], b']');
1435 ix += close_brackets;
1436
1437 if close_brackets == 0 || scan_ch(&bytes[ix..], b'>') == 0 {
1438 scan_guard.cdata = ix;
1439 None
1440 } else {
1441 Some(ix + 1)
1442 }
1443 }
1444 // A declaration consists of the string `<!`, an ASCII letter, zero or more characters not
1445 // including the character >, and the character >.
1446 _ if c.is_ascii_alphabetic() && ix > scan_guard.declaration => {
1447 ix = memchr(b'>', &bytes[ix..]).map_or(bytes.len(), |x| ix + x);
1448 if scan_ch(&bytes[ix..], b'>') == 0 {
1449 scan_guard.declaration = ix;
1450 None
1451 } else {
1452 Some(ix + 1)
1453 }
1454 }
1455 _ => None,
1456 }
1457}
1458
1459/// Scan processing directive, with initial "<?" already consumed.
1460/// Returns the next byte offset on success.
1461pub(crate) fn scan_inline_html_processing(
1462 bytes: &[u8],
1463 mut ix: usize,
1464 scan_guard: &mut HtmlScanGuard,
1465) -> Option<usize> {
1466 if ix <= scan_guard.processing {
1467 return None;
1468 }
1469 while let Some(offset: usize) = memchr(needle:b'?', &bytes[ix..]) {
1470 ix += offset + 1;
1471 if scan_ch(&bytes[ix..], c:b'>') == 1 {
1472 return Some(ix + 1);
1473 }
1474 }
1475 scan_guard.processing = ix;
1476 None
1477}
1478
1479#[cfg(test)]
1480mod test {
1481 use super::*;
1482 #[test]
1483 fn overflow_list() {
1484 assert!(
1485 scan_listitem(b"4444444444444444444444444444444444444444444444444444444444!").is_none()
1486 );
1487 }
1488
1489 #[test]
1490 fn overflow_by_addition() {
1491 assert!(scan_listitem(b"1844674407370955161615!").is_none());
1492 }
1493
1494 #[test]
1495 fn good_emails() {
1496 const EMAILS: &[&str] = &[
1497 "<a@b.c>",
1498 "<a@b>",
1499 "<a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-@example.com>",
1500 "<a@sixty-three-letters-in-this-identifier-----------------------63>",
1501 ];
1502 for email in EMAILS {
1503 assert!(scan_email(email, 1).is_some());
1504 }
1505 }
1506
1507 #[test]
1508 fn bad_emails() {
1509 const EMAILS: &[&str] = &[
1510 "<@b.c>",
1511 "<foo@-example.com>",
1512 "<foo@example-.com>",
1513 "<a@notrailingperiod.>",
1514 "<a(noparens)@example.com>",
1515 "<\"noquotes\"@example.com>",
1516 "<a@sixty-four-letters-in-this-identifier-------------------------64>",
1517 ];
1518 for email in EMAILS {
1519 assert!(scan_email(email, 1).is_none());
1520 }
1521 }
1522}
1523