scanners.rs source code [crates/pulldown_cmark/src/scanners.rs]

1	// Copyright 2015 Google Inc. All rights reserved.
2	//
3	// Permission is hereby granted, free of charge, to any person obtaining a copy
4	// of this software and associated documentation files (the "Software"), to deal
5	// in the Software without restriction, including without limitation the rights
6	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7	// copies of the Software, and to permit persons to whom the Software is
8	// furnished to do so, subject to the following conditions:
9	//
10	// The above copyright notice and this permission notice shall be included in
11	// all copies or substantial portions of the Software.
12	//
13	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19	// THE SOFTWARE.
20
21	//! Scanners for fragments of CommonMark syntax
22
23	use std::char;
24
25	use crate::parse::HtmlScanGuard;
26	pub(crate) use crate::puncttable::{is_ascii_punctuation, is_punctuation};
27	use crate::strings::CowStr;
28	use crate::{entities, BlockQuoteKind, HeadingLevel};
29	use crate::{Alignment, LinkType};
30
31	use memchr::memchr;
32
33	// sorted for binary search
34	const HTML_TAGS: [&str; `62`] = [
35	"address",
36	"article",
37	"aside",
38	"base",
39	"basefont",
40	"blockquote",
41	"body",
42	"caption",
43	"center",
44	"col",
45	"colgroup",
46	"dd",
47	"details",
48	"dialog",
49	"dir",
50	"div",
51	"dl",
52	"dt",
53	"fieldset",
54	"figcaption",
55	"figure",
56	"footer",
57	"form",
58	"frame",
59	"frameset",
60	"h1",
61	"h2",
62	"h3",
63	"h4",
64	"h5",
65	"h6",
66	"head",
67	"header",
68	"hr",
69	"html",
70	"iframe",
71	"legend",
72	"li",
73	"link",
74	"main",
75	"menu",
76	"menuitem",
77	"nav",
78	"noframes",
79	"ol",
80	"optgroup",
81	"option",
82	"p",
83	"param",
84	"search",
85	"section",
86	"summary",
87	"table",
88	"tbody",
89	"td",
90	"tfoot",
91	"th",
92	"thead",
93	"title",
94	"tr",
95	"track",
96	"ul",
97	];
98
99	/// Analysis of the beginning of a line, including indentation and container
100	/// markers.
101	#[derive(Clone)]
102	pub(crate) struct LineStart<'a> {
103	bytes: &'a [u8],
104	ix: usize,
105
106	// The index in `bytes` after the last tab we scanned; initially
107	// zero.
108	//
109	// Thus, there are no tab characters between `ix` and here, and for
110	// the purpose of defining block structure, this position can be
111	// considered to fall on a tab stop.
112	//
113	// This is only valid while scanning the initial portion of the
114	// line; methods that work with interior structure don't bother to
115	// update it.
116	tab_start: usize,
117
118	// In contexts where spaces help to define block structure, tabs
119	// behave as if they were replaced by spaces with a tab stop of 4
120	// characters.
121	//
122	// If we have scanned past a tab character but not consumed all
123	// the horizontal width it contributed, this is the number of
124	// spaces logically remaining, before the character at `ix`.
125	spaces_remaining: usize,
126
127	// no thematic breaks can occur before this offset.
128	// this prevents scanning over and over up to a certain point
129	min_hrule_offset: usize,
130	}
131
132	impl<'a> LineStart<'a> {
133	pub(crate) fn new(bytes: &[u8]) -> LineStart<'_> {
134	LineStart {
135	bytes,
136	tab_start: `0`,
137	ix: `0`,
138	spaces_remaining: `0`,
139	min_hrule_offset: `0`,
140	}
141	}
142
143	/// Try to scan a number of spaces.
144	///
145	/// Returns true if all spaces were consumed.
146	///
147	/// Note: consumes some spaces even if not successful.
148	pub(crate) fn scan_space(&mut self, n_space: usize) -> bool {
149	self.scan_space_inner(n_space) == `0`
150	}
151
152	/// Scan a number of spaces up to a maximum.
153	///
154	/// Returns number of spaces scanned.
155	pub(crate) fn scan_space_upto(&mut self, n_space: usize) -> usize {
156	n_space - self.scan_space_inner(n_space)
157	}
158
159	/// Returns unused remainder of spaces.
160	fn scan_space_inner(&mut self, mut n_space: usize) -> usize {
161	// Consume any common prefix between the number of spaces we
162	// want and the number of unscanned tab-introduced spaces.
163	let n_from_remaining = self.spaces_remaining.min(n_space);
164	self.spaces_remaining -= n_from_remaining;
165	n_space -= n_from_remaining;
166
167	while n_space > `0` && self.ix < self.bytes.len() {
168	match self.bytes[self.ix] {
169	b' ' => {
170	self.ix += `1`;
171	n_space -= `1`;
172	}
173	b'`\t`' => {
174	let spaces = `4` - (self.ix - self.tab_start) % `4`;
175	self.ix += `1`;
176	self.tab_start = self.ix;
177	let n = spaces.min(n_space);
178	n_space -= n;
179
180	// Record the unscanned portion of the tab.
181	self.spaces_remaining = spaces - n;
182	}
183	_ => break,
184	}
185	}
186	n_space
187	}
188
189	/// Scan all available ASCII whitespace (not including eol).
190	pub(crate) fn scan_all_space(&mut self) {
191	self.spaces_remaining = `0`;
192	self.ix += self.bytes[self.ix..]
193	.iter()
194	.take_while(\|&&b\| b == b' ' \|\| b == b'`\t`')
195	.count();
196	}
197
198	/// Determine whether we're at end of line (includes end of file).
199	pub(crate) fn is_at_eol(&self) -> bool {
200	self.bytes
201	.get(self.ix)
202	.map(\|&c\| c == b'`\r`' \|\| c == b'`\n`')
203	.unwrap_or(`true`)
204	}
205
206	fn scan_ch(&mut self, c: u8) -> bool {
207	if self.ix < self.bytes.len() && self.bytes[self.ix] == c {
208	self.ix += `1`;
209	`true`
210	} else {
211	`false`
212	}
213	}
214
215	fn scan_case_insensitive(&mut self, tag: &[u8]) -> bool {
216	if self.bytes.len() - self.ix < tag.len() {
217	return `false`;
218	}
219	let prefix = &self.bytes[self.ix..self.ix + tag.len()];
220	let ok = prefix.eq_ignore_ascii_case(tag);
221	if ok {
222	self.ix += tag.len();
223	}
224	ok
225	}
226
227	pub(crate) fn scan_blockquote_tag(&mut self) -> Option<BlockQuoteKind> {
228	let saved_ix = self.ix;
229	let tag = if self.scan_ch(b'[') && self.scan_ch(b'!') {
230	let tag = if self.scan_case_insensitive(b"note") {
231	Some(BlockQuoteKind::Note)
232	} else if self.scan_case_insensitive(b"tip") {
233	Some(BlockQuoteKind::Tip)
234	} else if self.scan_case_insensitive(b"important") {
235	Some(BlockQuoteKind::Important)
236	} else if self.scan_case_insensitive(b"warning") {
237	Some(BlockQuoteKind::Warning)
238	} else if self.scan_case_insensitive(b"caution") {
239	Some(BlockQuoteKind::Caution)
240	} else {
241	None
242	};
243	if tag.is_some() && self.scan_ch(b']') {
244	if let Some(nl) = scan_blank_line(&self.bytes[self.ix..]) {
245	self.ix += nl;
246	tag
247	} else {
248	None
249	}
250	} else {
251	None
252	}
253	} else {
254	None
255	};
256	if tag.is_none() {
257	self.ix = saved_ix;
258	}
259	tag
260	}
261
262	pub(crate) fn scan_blockquote_marker(&mut self) -> bool {
263	let save = self.clone();
264	let _ = self.scan_space(`3`);
265	if self.scan_ch(b'>') {
266	let _ = self.scan_space(`1`);
267	`true`
268	} else {
269	*self = save;
270	`false`
271	}
272	}
273
274	/// Scan a list marker.
275	///
276	/// Return value is the character, the start index, and the indent in spaces.
277	/// For ordered list markers, the character will be one of b'.' or b')'. For
278	/// bullet list markers, it will be one of b'-', b'+', or b''.*
279	pub(crate) fn scan_list_marker(&mut self) -> Option<(u8, u64, usize)> {
280	let save = self.clone();
281	let indent = self.scan_space_upto(`4`);
282	if indent < `4` && self.ix < self.bytes.len() {
283	let c = self.bytes[self.ix];
284	if c == b'-' \|\| c == b'+' \|\| c == b'*' {
285	if self.ix >= self.min_hrule_offset {
286	// there could be an hrule here
287	if let Err(min_offset) = scan_hrule(&self.bytes[self.ix..]) {
288	self.min_hrule_offset = min_offset;
289	} else {
290	*self = save;
291	return None;
292	}
293	}
294	self.ix += `1`;
295	if self.scan_space(`1`) \|\| self.is_at_eol() {
296	return self.finish_list_marker(c, `0`, indent + `2`);
297	}
298	} else if c.is_ascii_digit() {
299	let start_ix = self.ix;
300	let mut ix = self.ix + `1`;
301	let mut val = u64::from(c - b'0');
302	while ix < self.bytes.len() && ix - start_ix < `10` {
303	let c = self.bytes[ix];
304	ix += `1`;
305	if c.is_ascii_digit() {
306	val = val * `10` + u64::from(c - b'0');
307	} else if c == b')' \|\| c == b'.' {
308	self.ix = ix;
309	if self.scan_space(`1`) \|\| self.is_at_eol() {
310	return self.finish_list_marker(c, val, indent + `1` + ix - start_ix);
311	} else {
312	break;
313	}
314	} else {
315	break;
316	}
317	}
318	}
319	}
320	*self = save;
321	None
322	}
323
324	fn finish_list_marker(
325	&mut self,
326	c: u8,
327	start: u64,
328	mut indent: usize,
329	) -> Option<(u8, u64, usize)> {
330	let save = self.clone();
331
332	// skip the rest of the line if it's blank
333	if scan_blank_line(&self.bytes[self.ix..]).is_some() {
334	return Some((c, start, indent));
335	}
336
337	let post_indent = self.scan_space_upto(`4`);
338	if post_indent < `4` {
339	indent += post_indent;
340	} else {
341	*self = save;
342	}
343	Some((c, start, indent))
344	}
345
346	/// Returns Some(is_checked) when a task list marker was found. Resets itself
347	/// to original state otherwise.
348	pub(crate) fn scan_task_list_marker(&mut self) -> Option<bool> {
349	let save = self.clone();
350	self.scan_space_upto(`3`);
351
352	if !self.scan_ch(b'[') {
353	*self = save;
354	return None;
355	}
356	let is_checked = match self.bytes.get(self.ix) {
357	Some(&c) if is_ascii_whitespace_no_nl(c) => {
358	self.ix += `1`;
359	`false`
360	}
361	Some(b'x') \| Some(b'X') => {
362	self.ix += `1`;
363	`true`
364	}
365	_ => {
366	*self = save;
367	return None;
368	}
369	};
370	if !self.scan_ch(b']') {
371	*self = save;
372	return None;
373	}
374	if !self
375	.bytes
376	.get(self.ix)
377	.map(\|&b\| is_ascii_whitespace_no_nl(b))
378	.unwrap_or(`false`)
379	{
380	*self = save;
381	return None;
382	}
383	Some(is_checked)
384	}
385
386	pub(crate) fn bytes_scanned(&self) -> usize {
387	self.ix
388	}
389
390	pub(crate) fn remaining_space(&self) -> usize {
391	self.spaces_remaining
392	}
393	}
394
395	pub(crate) fn is_ascii_whitespace(c: u8) -> bool {
396	(`0x09`..=`0x0d`).contains(&c) \|\| c == b' '
397	}
398
399	pub(crate) fn is_ascii_whitespace_no_nl(c: u8) -> bool {
400	c == b'`\t`' \|\| c == `0x0b` \|\| c == `0x0c` \|\| c == b' '
401	}
402
403	fn is_ascii_alpha(c: u8) -> bool {
404	c.is_ascii_alphabetic()
405	}
406
407	fn is_ascii_alphanumeric(c: u8) -> bool {
408	matches!(c, b'0'..=b'9' \| b'a'..=b'z' \| b'A'..=b'Z')
409	}
410
411	fn is_ascii_letterdigitdash(c: u8) -> bool {
412	c == b'-' \|\| is_ascii_alphanumeric(c)
413	}
414
415	fn is_digit(c: u8) -> bool {
416	c.is_ascii_digit()
417	}
418
419	fn is_valid_unquoted_attr_value_char(c: u8) -> bool {
420	!matches!(
421	c,
422	b'`\'`' \| b'"' \| b' ' \| b'=' \| b'>' \| b'<' \| b'`' \| b'`\n`' \| b'`\r`'
423	)
424	}
425
426	// scan a single character
427	pub(crate) fn scan_ch(data: &[u8], c: u8) -> usize {
428	if !data.is_empty() && data[`0`] == c {
429	`1`
430	} else {
431	`0`
432	}
433	}
434
435	pub(crate) fn scan_while<F>(data: &[u8], mut f: F) -> usize
436	where
437	F: FnMut(u8) -> bool,
438	{
439	data.iter().take_while(\|&&c: u8\| f(c)).count()
440	}
441
442	pub(crate) fn scan_rev_while<F>(data: &[u8], mut f: F) -> usize
443	where
444	F: FnMut(u8) -> bool,
445	{
446	data.iter().rev().take_while(\|&&c: u8\| f(c)).count()
447	}
448
449	pub(crate) fn scan_ch_repeat(data: &[u8], c: u8) -> usize {
450	scan_while(data, \|x: u8\| x == c)
451	}
452
453	// Note: this scans ASCII whitespace only, for Unicode whitespace use
454	// a different function.
455	pub(crate) fn scan_whitespace_no_nl(data: &[u8]) -> usize {
456	scan_while(data, f:is_ascii_whitespace_no_nl)
457	}
458
459	fn scan_attr_value_chars(data: &[u8]) -> usize {
460	scan_while(data, f:is_valid_unquoted_attr_value_char)
461	}
462
463	pub(crate) fn scan_eol(bytes: &[u8]) -> Option<usize> {
464	if bytes.is_empty() {
465	return Some(`0`);
466	}
467	match bytes[`0`] {
468	b'`\n`' => Some(`1`),
469	b'`\r`' => Some(if bytes.get(index:`1`) == Some(&b'`\n`') { `2` } else { `1` }),
470	_ => None,
471	}
472	}
473
474	pub(crate) fn scan_blank_line(bytes: &[u8]) -> Option<usize> {
475	let i: usize = scan_whitespace_no_nl(data:bytes);
476	scan_eol(&bytes[i..]).map(\|n: usize\| i + n)
477	}
478
479	pub(crate) fn scan_nextline(bytes: &[u8]) -> usize {
480	memchr(b'`\n`', bytes).map_or(default:bytes.len(), \|x: usize\| x + `1`)
481	}
482
483	// return: end byte for closing code fence, or None
484	// if the line is not a closing code fence
485	pub(crate) fn scan_closing_code_fence(
486	bytes: &[u8],
487	fence_char: u8,
488	n_fence_char: usize,
489	) -> Option<usize> {
490	if bytes.is_empty() {
491	return Some(`0`);
492	}
493	let mut i: usize = `0`;
494	let num_fence_chars_found: usize = scan_ch_repeat(&bytes[i..], c:fence_char);
495	if num_fence_chars_found < n_fence_char {
496	return None;
497	}
498	i += num_fence_chars_found;
499	let num_trailing_spaces: usize = scan_ch_repeat(&bytes[i..], c:b' ');
500	i += num_trailing_spaces;
501	scan_eol(&bytes[i..]).map(\|_\| i)
502	}
503
504	// return: end byte for closing metadata block, or None
505	// if the line is not a closing metadata block
506	pub(crate) fn scan_closing_metadata_block(bytes: &[u8], fence_char: u8) -> Option<usize> {
507	let mut i: usize = `0`;
508	let mut num_fence_chars_found: usize = scan_ch_repeat(&bytes[i..], c:fence_char);
509	if num_fence_chars_found != `3` {
510	// if YAML style metadata block the closing character can also be `.`
511	if fence_char == b'-' {
512	num_fence_chars_found = scan_ch_repeat(&bytes[i..], c:b'.');
513	if num_fence_chars_found != `3` {
514	return None;
515	}
516	} else {
517	return None;
518	}
519	}
520	i += num_fence_chars_found;
521	let num_trailing_spaces: usize = scan_ch_repeat(&bytes[i..], c:b' ');
522	i += num_trailing_spaces;
523	scan_eol(&bytes[i..]).map(\|_\| i)
524	}
525
526	// returned pair is (number of bytes, number of spaces)
527	pub(crate) fn calc_indent(text: &[u8], max: usize) -> (usize, usize) {
528	let mut spaces = `0`;
529	let mut offset = `0`;
530
531	for (i, &b) in text.iter().enumerate() {
532	offset = i;
533	match b {
534	b' ' => {
535	spaces += `1`;
536	if spaces == max {
537	break;
538	}
539	}
540	b'`\t`' => {
541	let new_spaces = spaces + `4` - (spaces & `3`);
542	if new_spaces > max {
543	break;
544	}
545	spaces = new_spaces;
546	}
547	_ => break,
548	}
549	}
550
551	(offset, spaces)
552	}
553
554	/// Scan hrule opening sequence.
555	///
556	/// Returns Ok(x) when it finds an hrule, where x is the
557	/// size of line containing the hrule, including the trailing newline.
558	///
559	/// Returns Err(x) when it does not find an hrule and x is
560	/// the offset in data before no hrule can appear.
561	pub(crate) fn scan_hrule(bytes: &[u8]) -> Result<usize, usize> {
562	if bytes.len() < `3` {
563	return Err(`0`);
564	}
565	let c = bytes[`0`];
566	if !(c == b'*' \|\| c == b'-' \|\| c == b'_') {
567	return Err(`0`);
568	}
569	let mut n = `0`;
570	let mut i = `0`;
571
572	while i < bytes.len() {
573	match bytes[i] {
574	b'`\n`' \| b'`\r`' => {
575	i += scan_eol(&bytes[i..]).unwrap_or(`0`);
576	break;
577	}
578	c2 if c2 == c => {
579	n += `1`;
580	}
581	b' ' \| b'`\t`' => (),
582	_ => return Err(i),
583	}
584	i += `1`;
585	}
586	if n >= `3` {
587	Ok(i)
588	} else {
589	Err(i)
590	}
591	}
592
593	/// Scan an ATX heading opening sequence.
594	///
595	/// Returns number of bytes in prefix and level.
596	pub(crate) fn scan_atx_heading(data: &[u8]) -> Option<HeadingLevel> {
597	let level: usize = scan_ch_repeat(data, c:b'#');
598	if data.get(level).copied().map_or(default:`true`, f:is_ascii_whitespace) {
599	HeadingLevel::try_from(level).ok()
600	} else {
601	None
602	}
603	}
604
605	/// Scan a setext heading underline.
606	///
607	/// Returns number of bytes in line (including trailing newline) and level.
608	pub(crate) fn scan_setext_heading(data: &[u8]) -> Option<(usize, HeadingLevel)> {
609	let c: u8 = *data.first()?;
610	let level: HeadingLevel = if c == b'=' {
611	HeadingLevel::H1
612	} else if c == b'-' {
613	HeadingLevel::H2
614	} else {
615	return None;
616	};
617	let mut i: usize = `1` + scan_ch_repeat(&data[`1`..], c);
618	i += scan_blank_line(&data[i..])?;
619	Some((i, level))
620	}
621
622	// returns number of bytes in line (including trailing
623	// newline) and column alignments
624	pub(crate) fn scan_table_head(data: &[u8]) -> (usize, Vec<Alignment>) {
625	let (mut i, spaces) = calc_indent(data, `4`);
626	if spaces > `3` \|\| i == data.len() {
627	return (`0`, vec![]);
628	}
629	let mut cols = vec![];
630	let mut active_col = Alignment::None;
631	let mut start_col = `true`;
632	let mut found_pipe = `false`;
633	let mut found_hyphen = `false`;
634	let mut found_hyphen_in_col = `false`;
635	if data[i] == b'\|' {
636	i += `1`;
637	found_pipe = `true`;
638	}
639	for c in &data[i..] {
640	if let Some(n) = scan_eol(&data[i..]) {
641	i += n;
642	break;
643	}
644	match *c {
645	b' ' => (),
646	b':' => {
647	active_col = match (start_col, active_col) {
648	(`true`, Alignment::None) => Alignment::Left,
649	(`false`, Alignment::Left) => Alignment::Center,
650	(`false`, Alignment::None) => Alignment::Right,
651	_ => active_col,
652	};
653	start_col = `false`;
654	}
655	b'-' => {
656	start_col = `false`;
657	found_hyphen = `true`;
658	found_hyphen_in_col = `true`;
659	}
660	b'\|' => {
661	start_col = `true`;
662	found_pipe = `true`;
663	cols.push(active_col);
664	active_col = Alignment::None;
665	if !found_hyphen_in_col {
666	// It isn't a table head if it has back-to-back pipes.
667	return (`0`, vec![]);
668	}
669	found_hyphen_in_col = `false`;
670	}
671	_ => {
672	// It isn't a table head if it has characters outside the allowed set.
673	return (`0`, vec![]);
674	}
675	}
676	i += `1`;
677	}
678
679	if !start_col {
680	cols.push(active_col);
681	}
682	if !found_pipe \|\| !found_hyphen {
683	// It isn't a table head if it doesn't have a least one pipe or hyphen.
684	// It's a list, a header, or a thematic break.
685	return (`0`, vec![]);
686	}
687
688	(i, cols)
689	}
690
691	/// Scan code fence.
692	///
693	/// Returns number of bytes scanned and the char that is repeated to make the code fence.
694	pub(crate) fn scan_code_fence(data: &[u8]) -> Option<(usize, u8)> {
695	let c: u8 = *data.first()?;
696	if !(c == b'`' \|\| c == b'~') {
697	return None;
698	}
699	let i: usize = `1` + scan_ch_repeat(&data[`1`..], c);
700	if i >= `3` {
701	if c == b'`' {
702	let suffix: &[u8] = &data[i..];
703	let next_line: usize = i + scan_nextline(bytes:suffix);
704	// FIXME: make sure this is correct
705	if suffix[..(next_line - i)].iter().any(\|&b: u8\| b == b'`') {
706	return None;
707	}
708	}
709	Some((i, c))
710	} else {
711	None
712	}
713	}
714
715	/// Scan metadata block, returning the number of delimiter bytes
716	/// (always 3 for now) and the delimiter character.
717	///
718	/// Differently to code blocks, metadata blocks must be closed with the closing
719	/// sequence not being a valid terminator the end of the file.
720	///
721	/// In addition, they cannot be empty (closing sequence in the next line) and
722	/// the next line cannot be an empty line.
723	pub(crate) fn scan_metadata_block(
724	data: &[u8],
725	yaml_style_enabled: bool,
726	pluses_style_enabled: bool,
727	) -> Option<(usize, u8)> {
728	// Only if metadata blocks are enabled
729	if yaml_style_enabled \|\| pluses_style_enabled {
730	let c = *data.first()?;
731	if !((c == b'-' && yaml_style_enabled) \|\| (c == b'+' && pluses_style_enabled)) {
732	return None;
733	}
734	let i = `1` + scan_ch_repeat(&data[`1`..], c);
735	// Only trailing spaces after the delimiters in the line
736	let next_line = scan_nextline(&data[i..]);
737	for c in &data[i..i + next_line] {
738	if !c.is_ascii_whitespace() {
739	return None;
740	}
741	}
742	if i == `3` {
743	// Search the closing sequence
744	let mut j = i;
745	let mut first_line = `true`;
746	while j < data.len() {
747	j += scan_nextline(&data[j..]);
748	let closed = scan_closing_metadata_block(&data[j..], c).is_some();
749	// The first line of the metadata block cannot be an empty line
750	// nor the end of the block
751	if first_line {
752	if closed \|\| scan_blank_line(&data[j..]).is_some() {
753	return None;
754	}
755	first_line = `false`;
756	}
757	if closed {
758	return Some((i, c));
759	}
760	}
761	None
762	} else {
763	None
764	}
765	} else {
766	None
767	}
768	}
769
770	pub(crate) fn scan_blockquote_start(data: &[u8]) -> Option<usize> {
771	if data.first().copied() == Some(b'>') {
772	let space: usize = if data.get(index:`1`).copied() == Some(b' ') {
773	`1`
774	} else {
775	`0`
776	};
777	Some(`1` + space)
778	} else {
779	None
780	}
781	}
782
783	/// return number of bytes scanned, delimiter, start index, and indent
784	pub(crate) fn scan_listitem(bytes: &[u8]) -> Option<(usize, u8, usize, usize)> {
785	let mut c = *bytes.first()?;
786	let (w, start) = match c {
787	b'-' \| b'+' \| b'*' => (`1`, `0`),
788	b'0'..=b'9' => {
789	let (length, start) = parse_decimal(bytes, `9`);
790	c = *bytes.get(length)?;
791	if !(c == b'.' \|\| c == b')') {
792	return None;
793	}
794	(length + `1`, start)
795	}
796	_ => {
797	return None;
798	}
799	};
800	// TODO: replace calc_indent with scan_leading_whitespace, for tab correctness
801	let (mut postn, mut postindent) = calc_indent(&bytes[w..], `5`);
802	if postindent == `0` {
803	scan_eol(&bytes[w..])?;
804	postindent += `1`;
805	} else if postindent > `4` {
806	postn = `1`;
807	postindent = `1`;
808	}
809	if scan_blank_line(&bytes[w..]).is_some() {
810	postn = `0`;
811	postindent = `1`;
812	}
813	Some((w + postn, c, start, w + postindent))
814	}
815
816	// returns (number of bytes, parsed decimal)
817	fn parse_decimal(bytes: &[u8], limit: usize) -> (usize, usize) {
818	match bytes
819	.iter()
820	.take(limit)
821	.take_while(\|&&b\| is_digit(b))
822	.try_fold((`0`, `0usize`), \|(count: usize, acc: usize), c: &u8\| {
823	let digit: usize = usize::from(c - b'0');
824	match accOption
825	.checked_mul(`10`)
826	.and_then(\|ten_acc: usize\| ten_acc.checked_add(digit))
827	{
828	Some(number: usize) => Ok((count + `1`, number)),
829	// stop early on overflow
830	None => Err((count, acc)),
831	}
832	}) {
833	Ok(p: (usize, usize)) \| Err(p: (usize, usize)) => p,
834	}
835	}
836
837	// returns (number of bytes, parsed hex)
838	fn parse_hex(bytes: &[u8], limit: usize) -> (usize, usize) {
839	match bytes
840	.iter()
841	.take(limit)
842	.try_fold((`0`, `0usize`), \|(count, acc), c\| {
843	let mut c = *c;
844	let digit = if c.is_ascii_digit() {
845	usize::from(c - b'0')
846	} else {
847	// make lower case
848	c \|= `0x20`;
849	if (b'a'..=b'f').contains(&c) {
850	usize::from(c - b'a' + `10`)
851	} else {
852	return Err((count, acc));
853	}
854	};
855	match acc
856	.checked_mul(`16`)
857	.and_then(\|sixteen_acc\| sixteen_acc.checked_add(digit))
858	{
859	Some(number) => Ok((count + `1`, number)),
860	// stop early on overflow
861	None => Err((count, acc)),
862	}
863	}) {
864	Ok(p) \| Err(p) => p,
865	}
866	}
867
868	fn char_from_codepoint(input: usize) -> Option<char> {
869	let codepoint: u32 = input.try_into().ok()?;
870	if codepoint == `0` {
871	return None;
872	}
873	char::from_u32(codepoint)
874	}
875
876	// doesn't bother to check data[0] == '&'
877	pub(crate) fn scan_entity(bytes: &[u8]) -> (usize, Option<CowStr<'static>>) {
878	let mut end = `1`;
879	if scan_ch(&bytes[end..], b'#') == `1` {
880	end += `1`;
881	let (bytecount, codepoint) = if end < bytes.len() && bytes[end] \| `0x20` == b'x' {
882	end += `1`;
883	parse_hex(&bytes[end..], `6`)
884	} else {
885	parse_decimal(&bytes[end..], `7`)
886	};
887	end += bytecount;
888	return if bytecount == `0` \|\| scan_ch(&bytes[end..], b';') == `0` {
889	(`0`, None)
890	} else {
891	(
892	end + `1`,
893	Some(char_from_codepoint(codepoint).unwrap_or('`\u{FFFD}`').into()),
894	)
895	};
896	}
897	end += scan_while(&bytes[end..], is_ascii_alphanumeric);
898	if scan_ch(&bytes[end..], b';') == `1` {
899	if let Some(value) = entities::get_entity(&bytes[`1`..end]) {
900	return (end + `1`, Some(value.into()));
901	}
902	}
903	(`0`, None)
904	}
905
906	// note: dest returned is raw, still needs to be unescaped
907	// TODO: check that nested parens are really not allowed for refdefs
908	// TODO(performance): this func should probably its own unescaping
909	pub(crate) fn scan_link_dest(
910	data: &str,
911	start_ix: usize,
912	max_next: usize,
913	) -> Option<(usize, &str)> {
914	let bytes = &data.as_bytes()[start_ix..];
915	let mut i = scan_ch(bytes, b'<');
916
917	if i != `0` {
918	// pointy links
919	while i < bytes.len() {
920	match bytes[i] {
921	b'`\n`' \| b'`\r`' \| b'<' => return None,
922	b'>' => return Some((i + `1`, &data[(start_ix + `1`)..(start_ix + i)])),
923	b'`\\`' if i + `1` < bytes.len() && is_ascii_punctuation(bytes[i + `1`]) => {
924	i += `1`;
925	}
926	_ => {}
927	}
928	i += `1`;
929	}
930	None
931	} else {
932	// non-pointy links
933	let mut nest = `0`;
934	while i < bytes.len() {
935	match bytes[i] {
936	`0x0`..=`0x20` => {
937	break;
938	}
939	b'(' => {
940	if nest > max_next {
941	return None;
942	}
943	nest += `1`;
944	}
945	b')' => {
946	if nest == `0` {
947	break;
948	}
949	nest -= `1`;
950	}
951	b'`\\`' if i + `1` < bytes.len() && is_ascii_punctuation(bytes[i + `1`]) => {
952	i += `1`;
953	}
954	_ => {}
955	}
956	i += `1`;
957	}
958	if nest != `0` {
959	return None;
960	}
961	Some((i, &data[start_ix..(start_ix + i)]))
962	}
963	}
964
965	/// Returns bytes scanned
966	fn scan_attribute_name(data: &[u8]) -> Option<usize> {
967	let (&c: u8, tail: &[u8]) = data.split_first()?;
968	if is_ascii_alpha(c) \|\| c == b'_' \|\| c == b':' {
969	Some(
970	`1` + scan_while(data:tail, \|c: u8\| {
971	is_ascii_alphanumeric(c) \|\| c == b'_' \|\| c == b'.' \|\| c == b':' \|\| c == b'-'
972	}),
973	)
974	} else {
975	None
976	}
977	}
978
979	/// Returns the index immediately following the attribute on success.
980	/// The argument `buffer_ix` refers to the index into `data` from which we
981	/// should copy into `buffer` when we find bytes to skip.
982	fn scan_attribute(
983	data: &[u8],
984	mut ix: usize,
985	newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
986	buffer: &mut Vec<u8>,
987	buffer_ix: &mut usize,
988	) -> Option<usize> {
989	ix += scan_attribute_name(&data[ix..])?;
990	let ix_after_attribute: usize = ix;
991	ix = scan_whitespace_with_newline_handler_without_buffer(data, i:ix, newline_handler)?;
992	if scan_ch(&data[ix..], c:b'=') == `1` {
993	ix = scan_whitespace_with_newline_handler(data, i:ix_after_attribute, newline_handler, buffer, buffer_ix)?;
994	ix += `1`;
995	ix = scan_whitespace_with_newline_handler(data, i:ix, newline_handler, buffer, buffer_ix)?;
996	ix = scan_attribute_value(data, i:ix, newline_handler, buffer, buffer_ix)?;
997	Some(ix)
998	} else {
999	// Leave whitespace for next attribute.
1000	Some(ix_after_attribute)
1001	}
1002	}
1003
1004	/// Scans whitespace and possibly newlines according to the
1005	/// behavior defined by the newline handler. When bytes are skipped,
1006	/// all preceding non-skipped bytes are pushed to the buffer.
1007	fn scan_whitespace_with_newline_handler(
1008	data: &[u8],
1009	mut i: usize,
1010	newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
1011	buffer: &mut Vec<u8>,
1012	buffer_ix: &mut usize,
1013	) -> Option<usize> {
1014	while i < data.len() {
1015	if !is_ascii_whitespace(data[i]) {
1016	return Some(i);
1017	}
1018	if let Some(eol_bytes: usize) = scan_eol(&data[i..]) {
1019	let handler: &dyn Fn(&[u8]) -> usize = newline_handler?;
1020	i += eol_bytes;
1021	let skipped_bytes: usize = handler(&data[i..]);
1022
1023	if skipped_bytes > `0` {
1024	buffer.extend(&data[*buffer_ix..i]);
1025	*buffer_ix = i + skipped_bytes;
1026	}
1027
1028	i += skipped_bytes;
1029	} else {
1030	i += `1`;
1031	}
1032	}
1033
1034	Some(i)
1035	}
1036
1037	/// Scans whitespace and possible newlines according to the behavior defined
1038	/// by the newline handler.
1039	///
1040	/// Unlike [`scan_whitespace_with_newline_handler`], this function doesn't
1041	/// copy skipped data into a buffer. Typically, if this function
1042	/// returns `Some`, a call to `scan_whitespace_with_newline_handler` will
1043	/// soon follow.
1044	fn scan_whitespace_with_newline_handler_without_buffer(
1045	data: &[u8],
1046	mut i: usize,
1047	newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
1048	) -> Option<usize> {
1049	while i < data.len() {
1050	if !is_ascii_whitespace(data[i]) {
1051	return Some(i);
1052	}
1053	if let Some(eol_bytes: usize) = scan_eol(&data[i..]) {
1054	let handler: &dyn Fn(&[u8]) -> usize = newline_handler?;
1055	i += eol_bytes;
1056	let skipped_bytes: usize = handler(&data[i..]);
1057	i += skipped_bytes;
1058	} else {
1059	i += `1`;
1060	}
1061	}
1062
1063	Some(i)
1064	}
1065
1066	/// Returns the index immediately following the attribute value on success.
1067	fn scan_attribute_value(
1068	data: &[u8],
1069	mut i: usize,
1070	newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
1071	buffer: &mut Vec<u8>,
1072	buffer_ix: &mut usize,
1073	) -> Option<usize> {
1074	match *data.get(i)? {
1075	b @ b'"' \| b @ b'`\'`' => {
1076	i += `1`;
1077	while i < data.len() {
1078	if data[i] == b {
1079	return Some(i + `1`);
1080	}
1081	if let Some(eol_bytes) = scan_eol(&data[i..]) {
1082	let handler = newline_handler?;
1083	i += eol_bytes;
1084	let skipped_bytes = handler(&data[i..]);
1085
1086	if skipped_bytes > `0` {
1087	buffer.extend(&data[*buffer_ix..i]);
1088	*buffer_ix = i + skipped_bytes;
1089	}
1090	i += skipped_bytes;
1091	} else {
1092	i += `1`;
1093	}
1094	}
1095	return None;
1096	}
1097	b' ' \| b'=' \| b'>' \| b'<' \| b'`' \| b'`\n`' \| b'`\r`' => {
1098	return None;
1099	}
1100	_ => {
1101	// unquoted attribute value
1102	i += scan_attr_value_chars(&data[i..]);
1103	}
1104	}
1105
1106	Some(i)
1107	}
1108
1109	// Remove backslash escapes and resolve entities
1110	pub(crate) fn unescape<'a, I: Into<CowStr<'a>>>(input: I, is_in_table: bool) -> CowStr<'a> {
1111	let input = input.into();
1112	let mut result = String::new();
1113	let mut mark = `0`;
1114	let mut i = `0`;
1115	let bytes = input.as_bytes();
1116	while i < bytes.len() {
1117	match bytes[i] {
1118	// Tables are special, because they're parsed as-if the tables
1119	// were parsed in a discrete pass, changing `\\|` to `\|`, and then
1120	// passing the changed string to the inline parser.
1121	b'`\\`'
1122	if is_in_table
1123	&& i + `2` < bytes.len()
1124	&& bytes[i + `1`] == b'`\\`'
1125	&& bytes[i + `2`] == b'\|' =>
1126	{
1127	// even number of `\`s before pipe
1128	// odd number is handled in the normal way below
1129	result.push_str(&input[mark..i]);
1130	mark = i + `2`;
1131	i += `3`;
1132	}
1133	b'`\\`' if i + `1` < bytes.len() && is_ascii_punctuation(bytes[i + `1`]) => {
1134	result.push_str(&input[mark..i]);
1135	mark = i + `1`;
1136	i += `2`;
1137	}
1138	b'&' => match scan_entity(&bytes[i..]) {
1139	(n, Some(value)) => {
1140	result.push_str(&input[mark..i]);
1141	result.push_str(&value);
1142	i += n;
1143	mark = i;
1144	}
1145	_ => i += `1`,
1146	},
1147	b'`\r`' => {
1148	result.push_str(&input[mark..i]);
1149	i += `1`;
1150	mark = i;
1151	}
1152	_ => i += `1`,
1153	}
1154	}
1155	if mark == `0` {
1156	input
1157	} else {
1158	result.push_str(&input[mark..]);
1159	result.into()
1160	}
1161	}
1162
1163	/// Assumes `data` is preceded by `<`.
1164	pub(crate) fn starts_html_block_type_6(data: &[u8]) -> bool {
1165	let i: usize = scan_ch(data, c:b'/');
1166	let tail: &[u8] = &data[i..];
1167	let n: usize = scan_while(data:tail, f:is_ascii_alphanumeric);
1168	if !is_html_tag(&tail[..n]) {
1169	return `false`;
1170	}
1171	// Starting condition says the next byte must be either a space, a tab,
1172	// the end of the line, the string >, or the string />
1173	let tail: &[u8] = &tail[n..];
1174	tail.is_empty()
1175	\|\| tail[`0`] == b' '
1176	\|\| tail[`0`] == b'`\t`'
1177	\|\| tail[`0`] == b'`\r`'
1178	\|\| tail[`0`] == b'`\n`'
1179	\|\| tail[`0`] == b'>'
1180	\|\| tail.len() >= `2` && &tail[..`2`] == b"/>"
1181	}
1182
1183	fn is_html_tag(tag: &[u8]) -> bool {
1184	HTML_TAGS
1185	.binary_search_by(\|probe: &&str\| {
1186	let probe_bytes_iter: Iter<'_, u8> = probe.as_bytes().iter();
1187	let tag_bytes_iter: Iter<'_, u8> = tag.iter();
1188
1189	probe_bytes_iterOption
1190	.zip(tag_bytes_iter)
1191	.find_map(\|(&a: u8, &b: u8)\| {
1192	// We can compare case insensitively because the probes are
1193	// all lower case alpha strings.
1194	match a.cmp(&(b \| `0x20`)) {
1195	std::cmp::Ordering::Equal => None,
1196	inequality: Ordering => Some(inequality),
1197	}
1198	})
1199	.unwrap_or_else(\|\| probe.len().cmp(&tag.len()))
1200	})
1201	.is_ok()
1202	}
1203
1204	/// Assumes that `data` starts with `<`.
1205	/// Returns the index into data directly after the html tag on success.
1206	pub(crate) fn scan_html_type_7(data: &[u8]) -> Option<usize> {
1207	// Block type html does not allow for newlines, so we
1208	// do not pass a newline handler.
1209	let (_span: Vec, i: usize) = scan_html_block_inner(data, newline_handler:None)?;
1210	scan_blank_line(&data[i..])?;
1211	Some(i)
1212	}
1213
1214	/// Assumes that `data` starts with `<`.
1215	/// Returns the number of bytes scanned and the html in case of
1216	/// success.
1217	/// When some bytes were skipped, because the html was split over
1218	/// multiple leafs (e.g. over multiple lines in a blockquote),
1219	/// the html is returned as a vector of bytes.
1220	/// If no bytes were skipped, the buffer will be empty.
1221	pub(crate) fn scan_html_block_inner(
1222	data: &[u8],
1223	newline_handler: Option<&dyn Fn(&[u8]) -> usize>,
1224	) -> Option<(Vec<u8>, usize)> {
1225	let mut buffer = Vec::new();
1226	let mut last_buf_index = `0`;
1227
1228	let close_tag_bytes = scan_ch(&data[`1`..], b'/');
1229	let l = scan_while(&data[(`1` + close_tag_bytes)..], is_ascii_alpha);
1230	if l == `0` {
1231	return None;
1232	}
1233	let mut i = `1` + close_tag_bytes + l;
1234	i += scan_while(&data[i..], is_ascii_letterdigitdash);
1235
1236	if close_tag_bytes == `0` {
1237	loop {
1238	let old_i = i;
1239	loop {
1240	i += scan_whitespace_no_nl(&data[i..]);
1241	if let Some(eol_bytes) = scan_eol(&data[i..]) {
1242	if eol_bytes == `0` {
1243	return None;
1244	}
1245	let handler = newline_handler?;
1246	i += eol_bytes;
1247	let skipped_bytes = handler(&data[i..]);
1248
1249	let data_len = data.len() - i;
1250
1251	debug_assert!(
1252	skipped_bytes <= data_len,
1253	"Handler tried to skip too many bytes, fed {}, skipped {}",
1254	data_len,
1255	skipped_bytes
1256	);
1257
1258	if skipped_bytes > `0` {
1259	buffer.extend(&data[last_buf_index..i]);
1260	i += skipped_bytes;
1261	last_buf_index = i;
1262	}
1263	} else {
1264	break;
1265	}
1266	}
1267	if let Some(b'/') \| Some(b'>') = data.get(i) {
1268	break;
1269	}
1270	if old_i == i {
1271	// No whitespace, which is mandatory.
1272	return None;
1273	}
1274	i = scan_attribute(data, i, newline_handler, &mut buffer, &mut last_buf_index)?;
1275	}
1276	}
1277
1278	i += scan_whitespace_no_nl(&data[i..]);
1279
1280	if close_tag_bytes == `0` {
1281	i += scan_ch(&data[i..], b'/');
1282	}
1283
1284	if scan_ch(&data[i..], b'>') == `0` {
1285	None
1286	} else {
1287	i += `1`;
1288	if !buffer.is_empty() {
1289	buffer.extend(&data[last_buf_index..i]);
1290	}
1291	Some((buffer, i))
1292	}
1293	}
1294
1295	/// Returns (next_byte_offset, uri, type)
1296	pub(crate) fn scan_autolink(text: &str, start_ix: usize) -> Option<(usize, CowStr<'_>, LinkType)> {
1297	scan_uriOption<(usize, CowStr<'_>, …)>(text, start_ix)
1298	.map(\|(bytes: usize, uri: CowStr<'_>)\| (bytes, uri, LinkType::Autolink))
1299	.or_else(\|\| scan_email(text, start_ix).map(\|(bytes: usize, uri: CowStr<'_>)\| (bytes, uri, LinkType::Email)))
1300	}
1301
1302	/// Returns (next_byte_offset, uri)
1303	fn scan_uri(text: &str, start_ix: usize) -> Option<(usize, CowStr<'_>)> {
1304	let bytes = &text.as_bytes()[start_ix..];
1305
1306	// scheme's first byte must be an ascii letter
1307	if bytes.is_empty() \|\| !is_ascii_alpha(bytes[`0`]) {
1308	return None;
1309	}
1310
1311	let mut i = `1`;
1312
1313	while i < bytes.len() {
1314	let c = bytes[i];
1315	i += `1`;
1316	match c {
1317	c if is_ascii_alphanumeric(c) => (),
1318	b'.' \| b'-' \| b'+' => (),
1319	b':' => break,
1320	_ => return None,
1321	}
1322	}
1323
1324	// scheme length must be between 2 and 32 characters long. scheme
1325	// must be followed by colon
1326	if !(`3`..=`33`).contains(&i) {
1327	return None;
1328	}
1329
1330	while i < bytes.len() {
1331	match bytes[i] {
1332	b'>' => return Some((start_ix + i + `1`, text[start_ix..(start_ix + i)].into())),
1333	b'`\0`'..=b' ' \| b'<' => return None,
1334	_ => (),
1335	}
1336	i += `1`;
1337	}
1338
1339	None
1340	}
1341
1342	/// Returns (next_byte_offset, email)
1343	fn scan_email(text: &str, start_ix: usize) -> Option<(usize, CowStr<'_>)> {
1344	// using a regex library would be convenient, but doing it by hand is not too bad
1345	let bytes = &text.as_bytes()[start_ix..];
1346	let mut i = `0`;
1347
1348	while i < bytes.len() {
1349	let c = bytes[i];
1350	i += `1`;
1351	match c {
1352	c if is_ascii_alphanumeric(c) => (),
1353	b'.' \| b'!' \| b'#' \| b'$' \| b'%' \| b'&' \| b'`\'`' \| b'*' \| b'+' \| b'/' \| b'=' \| b'?'
1354	\| b'^' \| b'_' \| b'`' \| b'{' \| b'\|' \| b'}' \| b'~' \| b'-' => (),
1355	b'@' if i > `1` => break,
1356	_ => return None,
1357	}
1358	}
1359
1360	loop {
1361	let label_start_ix = i;
1362	let mut fresh_label = `true`;
1363
1364	while i < bytes.len() {
1365	match bytes[i] {
1366	c if is_ascii_alphanumeric(c) => (),
1367	b'-' if fresh_label => {
1368	return None;
1369	}
1370	b'-' => (),
1371	_ => break,
1372	}
1373	fresh_label = `false`;
1374	i += `1`;
1375	}
1376
1377	if i == label_start_ix \|\| i - label_start_ix > `63` \|\| bytes[i - `1`] == b'-' {
1378	return None;
1379	}
1380
1381	if scan_ch(&bytes[i..], b'.') == `0` {
1382	break;
1383	}
1384	i += `1`;
1385	}
1386
1387	if scan_ch(&bytes[i..], b'>') == `0` {
1388	return None;
1389	}
1390
1391	Some((start_ix + i + `1`, text[start_ix..(start_ix + i)].into()))
1392	}
1393
1394	/// Scan comment, declaration, or CDATA section, with initial "<!" already consumed.
1395	/// Returns byte offset on match.
1396	pub(crate) fn scan_inline_html_comment(
1397	bytes: &[u8],
1398	mut ix: usize,
1399	scan_guard: &mut HtmlScanGuard,
1400	) -> Option<usize> {
1401	let c = *bytes.get(ix)?;
1402	ix += `1`;
1403	match c {
1404	// An HTML comment consists of `<!-->`, `<!--->`, or `<!--`, a string of characters not
1405	// including the string `-->`, and `-->`.
1406	b'-' if ix > scan_guard.comment => {
1407	// HTML comment needs two hyphens after the !.
1408	if *bytes.get(ix)? != b'-' {
1409	return None;
1410	}
1411	// Yes, we're intentionally going backwards.
1412	// We want the cursor to point here:
1413	//
1414	// <!--
1415	// ^
1416	//
1417	// This way, the `<!-->` case is covered by the loop below.
1418	ix -= `1`;
1419
1420	while let Some(x) = memchr(b'-', &bytes[ix..]) {
1421	ix += x + `1`;
1422	scan_guard.comment = ix;
1423	if scan_ch(&bytes[ix..], b'-') == `1` && scan_ch(&bytes[ix + `1`..], b'>') == `1` {
1424	return Some(ix + `2`);
1425	}
1426	}
1427	None
1428	}
1429	// A CDATA section consists of the string `<![CDATA[`, a string of characters not
1430	// including the string `]]>`, and the string `]]>`.
1431	b'[' if bytes[ix..].starts_with(b"CDATA[") && ix > scan_guard.cdata => {
1432	ix += b"CDATA[".len();
1433	ix = memchr(b']', &bytes[ix..]).map_or(bytes.len(), \|x\| ix + x);
1434	let close_brackets = scan_ch_repeat(&bytes[ix..], b']');
1435	ix += close_brackets;
1436
1437	if close_brackets == `0` \|\| scan_ch(&bytes[ix..], b'>') == `0` {
1438	scan_guard.cdata = ix;
1439	None
1440	} else {
1441	Some(ix + `1`)
1442	}
1443	}
1444	// A declaration consists of the string `<!`, an ASCII letter, zero or more characters not
1445	// including the character >, and the character >.
1446	_ if c.is_ascii_alphabetic() && ix > scan_guard.declaration => {
1447	ix = memchr(b'>', &bytes[ix..]).map_or(bytes.len(), \|x\| ix + x);
1448	if scan_ch(&bytes[ix..], b'>') == `0` {
1449	scan_guard.declaration = ix;
1450	None
1451	} else {
1452	Some(ix + `1`)
1453	}
1454	}
1455	_ => None,
1456	}
1457	}
1458
1459	/// Scan processing directive, with initial "<?" already consumed.
1460	/// Returns the next byte offset on success.
1461	pub(crate) fn scan_inline_html_processing(
1462	bytes: &[u8],
1463	mut ix: usize,
1464	scan_guard: &mut HtmlScanGuard,
1465	) -> Option<usize> {
1466	if ix <= scan_guard.processing {
1467	return None;
1468	}
1469	while let Some(offset: usize) = memchr(needle:b'?', &bytes[ix..]) {
1470	ix += offset + `1`;
1471	if scan_ch(&bytes[ix..], c:b'>') == `1` {
1472	return Some(ix + `1`);
1473	}
1474	}
1475	scan_guard.processing = ix;
1476	None
1477	}
1478
1479	#[cfg(test)]
1480	mod test {
1481	use super::*;
1482	#[test]
1483	fn overflow_list() {
1484	assert!(
1485	scan_listitem(b"4444444444444444444444444444444444444444444444444444444444!").is_none()
1486	);
1487	}
1488
1489	#[test]
1490	fn overflow_by_addition() {
1491	assert!(scan_listitem(b"1844674407370955161615!").is_none());
1492	}
1493
1494	#[test]
1495	fn good_emails() {
1496	const EMAILS: &[&str] = &[
1497	"<a@b.c>",
1498	"<a@b>",
1499	"<a-zA-Z0-9.!#$%&'*+/=?^_`{\|}~-@example.com>",
1500	"<a@sixty-three-letters-in-this-identifier-----------------------63>",
1501	];
1502	for email in EMAILS {
1503	assert!(scan_email(email, `1`).is_some());
1504	}
1505	}
1506
1507	#[test]
1508	fn bad_emails() {
1509	const EMAILS: &[&str] = &[
1510	"<@b.c>",
1511	"<foo@-example.com>",
1512	"<foo@example-.com>",
1513	"<a@notrailingperiod.>",
1514	"<a(noparens)@example.com>",
1515	"<`\"`noquotes`\"`@example.com>",
1516	"<a@sixty-four-letters-in-this-identifier-------------------------64>",
1517	];
1518	for email in EMAILS {
1519	assert!(scan_email(email, `1`).is_none());
1520	}
1521	}
1522	}
1523