firstpass.rs source code [crates/pulldown_cmark/src/firstpass.rs]

1	//! The first pass resolves all block structure, generating an AST. Within a block, items
2	//! are in a linear chain with potential inline markup identified.
3
4	use std::cmp::max;
5	use std::ops::Range;
6
7	use crate::parse::{
8	scan_containers, Allocations, FootnoteDef, HeadingAttributes, Item, ItemBody, LinkDef,
9	LINK_MAX_NESTED_PARENS,
10	};
11	use crate::strings::CowStr;
12	use crate::tree::{Tree, TreeIndex};
13	use crate::Options;
14	use crate::{
15	linklabel::{scan_link_label_rest, LinkLabel},
16	HeadingLevel,
17	};
18	use crate::{scanners::*, MetadataBlockKind};
19
20	use unicase::UniCase;
21
22	/// Runs the first pass, which resolves the block structure of the document,
23	/// and returns the resulting tree.
24	pub(crate) fn run_first_pass(text: &str, options: Options) -> (Tree<Item>, Allocations<'_>) {
25	// This is a very naive heuristic for the number of nodes
26	// we'll need.
27	let start_capacity: usize = max(v1:`128`, v2:text.len() / `32`);
28	let lookup_table: &[bool; 256] = &create_lut(&options);
29	let first_pass: FirstPass<'_, '_> = FirstPass {
30	text,
31	tree: Tree::with_capacity(cap:start_capacity),
32	begin_list_item: None,
33	last_line_blank: `false`,
34	allocs: Allocations::new(),
35	options,
36	lookup_table,
37	next_paragraph_task: None,
38	brace_context_next: `0`,
39	brace_context_stack: Vec::new(),
40	};
41	first_pass.run()
42	}
43
44	// Each level of brace nesting adds another entry to a hash table.
45	// To limit the amount of memory consumed, do not create a new brace
46	// context beyond some amount deep.
47	//
48	// There are actually two limits at play here: this one,
49	// and the one where the maximum amount of distinct contexts passes
50	// the 255 item limit imposed by using `u8`. When over 255 distinct
51	// contexts are created, it wraps around, while this one instead makes it
52	// saturate, which is a better behavior.
53	const MATH_BRACE_CONTEXT_MAX_NESTING: usize = `25`;
54
55	/// State for the first parsing pass.
56	struct FirstPass<'a, 'b> {
57	text: &'a str,
58	tree: Tree<Item>,
59	begin_list_item: Option<usize>,
60	last_line_blank: bool,
61	allocs: Allocations<'a>,
62	options: Options,
63	lookup_table: &'b LookupTable,
64	/// This is `Some(item)` when the next paragraph
65	/// starts with a task list marker.
66	next_paragraph_task: Option<Item>,
67	/// Math environment brace nesting.
68	brace_context_stack: Vec<u8>,
69	brace_context_next: usize,
70	}
71
72	impl<'a, 'b> FirstPass<'a, 'b> {
73	fn run(mut self) -> (Tree<Item>, Allocations<'a>) {
74	let mut ix = `0`;
75	while ix < self.text.len() {
76	ix = self.parse_block(ix);
77	}
78	for _ in `0`..self.tree.spine_len() {
79	self.pop(ix);
80	}
81	(self.tree, self.allocs)
82	}
83
84	/// Returns offset after block.
85	fn parse_block(&mut self, mut start_ix: usize) -> usize {
86	let bytes = self.text.as_bytes();
87	let mut line_start = LineStart::new(&bytes[start_ix..]);
88
89	// math spans and their braces are tracked only within a single block
90	self.brace_context_stack.clear();
91	self.brace_context_next = `0`;
92
93	let i = scan_containers(
94	&self.tree,
95	&mut line_start,
96	self.options.has_gfm_footnotes(),
97	);
98	for _ in i..self.tree.spine_len() {
99	self.pop(start_ix);
100	}
101
102	if self.options.contains(Options::ENABLE_OLD_FOOTNOTES) {
103	// finish footnote if it's still open and was preceded by blank line
104	if let Some(node_ix) = self.tree.peek_up() {
105	if let ItemBody::FootnoteDefinition(..) = self.tree[node_ix].item.body {
106	if self.last_line_blank {
107	self.pop(start_ix);
108	}
109	}
110	}
111
112	// Footnote definitions of the form
113	// [^bar]:
114	// anything really*
115	let container_start = start_ix + line_start.bytes_scanned();
116	if let Some(bytecount) = self.parse_footnote(container_start) {
117	start_ix = container_start + bytecount;
118	start_ix += scan_blank_line(&bytes[start_ix..]).unwrap_or(`0`);
119	line_start = LineStart::new(&bytes[start_ix..]);
120	}
121	}
122
123	// Process new containers
124	loop {
125	if self.options.has_gfm_footnotes()
126	\|\| self.options.contains(Options::ENABLE_OLD_FOOTNOTES)
127	{
128	// Footnote definitions of the form
129	// [^bar]:
130	// anything really*
131	let save = line_start.clone();
132	let indent = line_start.scan_space_upto(`4`);
133	if indent < `4` {
134	let container_start = start_ix + line_start.bytes_scanned();
135	if let Some(bytecount) = self.parse_footnote(container_start) {
136	start_ix = container_start + bytecount;
137	line_start = LineStart::new(&bytes[start_ix..]);
138	continue;
139	} else {
140	line_start = save;
141	}
142	} else {
143	line_start = save;
144	}
145	}
146	let container_start = start_ix + line_start.bytes_scanned();
147	if let Some((ch, index, indent)) = line_start.scan_list_marker() {
148	let after_marker_index = start_ix + line_start.bytes_scanned();
149	self.continue_list(container_start, ch, index);
150	self.tree.append(Item {
151	start: container_start,
152	end: after_marker_index, // will get updated later if item not empty
153	body: ItemBody::ListItem(indent),
154	});
155	self.tree.push();
156	if let Some(n) = scan_blank_line(&bytes[after_marker_index..]) {
157	self.begin_list_item = Some(after_marker_index + n);
158	return after_marker_index + n;
159	}
160	if self.options.contains(Options::ENABLE_TASKLISTS) {
161	let task_list_marker =
162	line_start.scan_task_list_marker().map(\|is_checked\| Item {
163	start: after_marker_index,
164	end: start_ix + line_start.bytes_scanned(),
165	body: ItemBody::TaskListMarker(is_checked),
166	});
167	if let Some(task_list_marker) = task_list_marker {
168	if let Some(n) = scan_blank_line(&bytes[task_list_marker.end..]) {
169	self.tree.append(task_list_marker);
170	self.begin_list_item = Some(task_list_marker.end + n);
171	return task_list_marker.end + n;
172	} else {
173	self.next_paragraph_task = Some(task_list_marker);
174	}
175	}
176	}
177	} else if line_start.scan_blockquote_marker() {
178	let kind = if self.options.contains(Options::ENABLE_GFM) {
179	line_start.scan_blockquote_tag()
180	} else {
181	None
182	};
183	self.finish_list(start_ix);
184	self.tree.append(Item {
185	start: container_start,
186	end: `0`, // will get set later
187	body: ItemBody::BlockQuote(kind),
188	});
189	self.tree.push();
190	if kind.is_some() {
191	// blockquote tag leaves us at the end of the line
192	// we need to scan through all the container syntax for the next line
193	// and break out if we can't re-scan all of them
194	let ix = start_ix + line_start.bytes_scanned();
195	let mut lazy_line_start = LineStart::new(&bytes[ix..]);
196	let current_container = scan_containers(
197	&self.tree,
198	&mut lazy_line_start,
199	self.options.has_gfm_footnotes(),
200	) == self.tree.spine_len();
201	if !lazy_line_start.scan_space(`4`)
202	&& self.scan_paragraph_interrupt(
203	&bytes[ix + lazy_line_start.bytes_scanned()..],
204	current_container,
205	)
206	{
207	return ix;
208	} else {
209	// blockquote tags act as if they were nested in a paragraph
210	// so you can lazily continue the imaginary paragraph off of them
211	line_start = lazy_line_start;
212	line_start.scan_all_space();
213	start_ix = ix;
214	break;
215	}
216	}
217	} else {
218	break;
219	}
220	}
221
222	let ix = start_ix + line_start.bytes_scanned();
223
224	if let Some(n) = scan_blank_line(&bytes[ix..]) {
225	if let Some(node_ix) = self.tree.peek_up() {
226	match &mut self.tree[node_ix].item.body {
227	ItemBody::BlockQuote(..) => (),
228	ItemBody::ListItem(indent) if self.begin_list_item.is_some() => {
229	self.last_line_blank = `true`;
230	// This is a blank list item.
231	// While the list itself can be continued no matter how many blank lines
232	// there are between this one and the next one, it cannot nest anything
233	// else, so its indentation should not be subtracted from the line start.
234	*indent = `0`;
235	}
236	_ => {
237	self.last_line_blank = `true`;
238	}
239	}
240	}
241	return ix + n;
242	}
243
244	self.finish_list(start_ix);
245
246	// Save `remaining_space` here to avoid needing to backtrack `line_start` for HTML blocks
247	let remaining_space = line_start.remaining_space();
248
249	let indent = line_start.scan_space_upto(`4`);
250	if indent == `4` {
251	let ix = start_ix + line_start.bytes_scanned();
252	let remaining_space = line_start.remaining_space();
253	return self.parse_indented_code_block(ix, remaining_space);
254	}
255
256	let ix = start_ix + line_start.bytes_scanned();
257
258	// metadata blocks cannot be indented
259	if indent == `0` {
260	if let Some((_n, metadata_block_ch)) = scan_metadata_block(
261	&bytes[ix..],
262	self.options
263	.contains(Options::ENABLE_YAML_STYLE_METADATA_BLOCKS),
264	self.options
265	.contains(Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS),
266	) {
267	return self.parse_metadata_block(ix, metadata_block_ch);
268	}
269	}
270
271	// HTML Blocks
272	if bytes[ix] == b'<' {
273	// Types 1-5 are all detected by one function and all end with the same
274	// pattern
275	if let Some(html_end_tag) = get_html_end_tag(&bytes[(ix + `1`)..]) {
276	return self.parse_html_block_type_1_to_5(
277	ix,
278	html_end_tag,
279	remaining_space,
280	indent,
281	);
282	}
283
284	// Detect type 6
285	if starts_html_block_type_6(&bytes[(ix + `1`)..]) {
286	return self.parse_html_block_type_6_or_7(ix, remaining_space, indent);
287	}
288
289	// Detect type 7
290	if let Some(_html_bytes) = scan_html_type_7(&bytes[ix..]) {
291	return self.parse_html_block_type_6_or_7(ix, remaining_space, indent);
292	}
293	}
294
295	if let Ok(n) = scan_hrule(&bytes[ix..]) {
296	return self.parse_hrule(n, ix);
297	}
298
299	if let Some(atx_size) = scan_atx_heading(&bytes[ix..]) {
300	return self.parse_atx_heading(ix, atx_size);
301	}
302
303	if let Some((n, fence_ch)) = scan_code_fence(&bytes[ix..]) {
304	return self.parse_fenced_code_block(ix, indent, fence_ch, n);
305	}
306
307	// parse refdef
308	while let Some((bytecount, label, link_def)) =
309	self.parse_refdef_total(start_ix + line_start.bytes_scanned())
310	{
311	self.allocs.refdefs.0.entry(label).or_insert(link_def);
312	let container_start = start_ix + line_start.bytes_scanned();
313	let mut ix = container_start + bytecount;
314	// Refdefs act as if they were contained within a paragraph, for purposes of lazy
315	// continuations. For example:
316	//
317	// ```
318	// > [foo]: http://example.com
319	// bar
320	// ```
321	//
322	// is equivalent to
323	//
324	// ```
325	// > bar
326	//
327	// [foo]: http://example.com
328	// ```
329	if let Some(nl) = scan_blank_line(&bytes[ix..]) {
330	ix += nl;
331	let mut lazy_line_start = LineStart::new(&bytes[ix..]);
332	let current_container = scan_containers(
333	&self.tree,
334	&mut lazy_line_start,
335	self.options.has_gfm_footnotes(),
336	) == self.tree.spine_len();
337	if !lazy_line_start.scan_space(`4`)
338	&& self.scan_paragraph_interrupt(
339	&bytes[ix + lazy_line_start.bytes_scanned()..],
340	current_container,
341	)
342	{
343	return ix;
344	} else {
345	line_start = lazy_line_start;
346	line_start.scan_all_space();
347	start_ix = ix;
348	}
349	} else {
350	return ix;
351	}
352	}
353
354	let ix = start_ix + line_start.bytes_scanned();
355
356	self.parse_paragraph(ix)
357	}
358
359	/// Returns the offset of the first line after the table.
360	/// Assumptions: current focus is a table element and the table header
361	/// matches the separator line (same number of columns).
362	fn parse_table(
363	&mut self,
364	table_cols: usize,
365	head_start: usize,
366	body_start: usize,
367	) -> Option<usize> {
368	// filled empty cells are limited to protect against quadratic growth
369	// https://github.com/raphlinus/pulldown-cmark/issues/832
370	let mut missing_empty_cells = `0`;
371	// parse header. this shouldn't fail because we made sure the table header is ok
372	let (_sep_start, thead_ix) =
373	self.parse_table_row_inner(head_start, table_cols, &mut missing_empty_cells)?;
374	self.tree[thead_ix].item.body = ItemBody::TableHead;
375
376	// parse body
377	let mut ix = body_start;
378	while let Some((next_ix, _row_ix)) =
379	self.parse_table_row(ix, table_cols, &mut missing_empty_cells)
380	{
381	ix = next_ix;
382	}
383
384	self.pop(ix);
385	Some(ix)
386	}
387
388	/// Call this when containers are taken care of.
389	/// Returns bytes scanned, row_ix
390	fn parse_table_row_inner(
391	&mut self,
392	mut ix: usize,
393	row_cells: usize,
394	missing_empty_cells: &mut usize,
395	) -> Option<(usize, TreeIndex)> {
396	// Limit to prevent a malicious input from causing a denial of service.
397	const MAX_AUTOCOMPLETED_CELLS: usize = `1` << `18`; // = 0x40000
398
399	let bytes = self.text.as_bytes();
400	let mut cells = `0`;
401	let mut final_cell_ix = None;
402
403	let old_cur = self.tree.cur();
404	let row_ix = self.tree.append(Item {
405	start: ix,
406	end: `0`, // set at end of this function
407	body: ItemBody::TableRow,
408	});
409	self.tree.push();
410
411	loop {
412	ix += scan_ch(&bytes[ix..], b'\|');
413	let start_ix = ix;
414	ix += scan_whitespace_no_nl(&bytes[ix..]);
415
416	if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
417	ix += eol_bytes;
418	break;
419	}
420
421	let cell_ix = self.tree.append(Item {
422	start: start_ix,
423	end: ix,
424	body: ItemBody::TableCell,
425	});
426	self.tree.push();
427	let (next_ix, _brk) = self.parse_line(ix, None, TableParseMode::Active);
428
429	self.tree[cell_ix].item.end = next_ix;
430	self.tree.pop();
431
432	ix = next_ix;
433	cells += `1`;
434
435	if cells == row_cells {
436	final_cell_ix = Some(cell_ix);
437	}
438	}
439
440	if let (Some(cur), `0`) = (old_cur, cells) {
441	self.pop(ix);
442	self.tree[cur].next = None;
443	return None;
444	}
445
446	// fill empty cells if needed
447	// note: this is where GFM and commonmark-extra diverge. we follow
448	// GFM here
449	for _ in cells..row_cells {
450	if *missing_empty_cells >= MAX_AUTOCOMPLETED_CELLS {
451	return None;
452	}
453	*missing_empty_cells += `1`;
454	self.tree.append(Item {
455	start: ix,
456	end: ix,
457	body: ItemBody::TableCell,
458	});
459	}
460
461	// drop excess cells
462	if let Some(cell_ix) = final_cell_ix {
463	self.tree[cell_ix].next = None;
464	}
465
466	self.pop(ix);
467
468	Some((ix, row_ix))
469	}
470
471	/// Returns first offset after the row and the tree index of the row.
472	fn parse_table_row(
473	&mut self,
474	mut ix: usize,
475	row_cells: usize,
476	missing_empty_cells: &mut usize,
477	) -> Option<(usize, TreeIndex)> {
478	let bytes = self.text.as_bytes();
479	let mut line_start = LineStart::new(&bytes[ix..]);
480	let current_container = scan_containers(
481	&self.tree,
482	&mut line_start,
483	self.options.has_gfm_footnotes(),
484	) == self.tree.spine_len();
485	if !current_container {
486	return None;
487	}
488	line_start.scan_all_space();
489	ix += line_start.bytes_scanned();
490	if scan_paragraph_interrupt_no_table(
491	&bytes[ix..],
492	current_container,
493	self.options.contains(Options::ENABLE_FOOTNOTES),
494	&self.tree,
495	) {
496	return None;
497	}
498
499	let (ix, row_ix) = self.parse_table_row_inner(ix, row_cells, missing_empty_cells)?;
500	Some((ix, row_ix))
501	}
502
503	/// Returns offset of line start after paragraph.
504	fn parse_paragraph(&mut self, start_ix: usize) -> usize {
505	let node_ix = self.tree.append(Item {
506	start: start_ix,
507	end: `0`, // will get set later
508	body: ItemBody::Paragraph,
509	});
510	self.tree.push();
511
512	if let Some(item) = self.next_paragraph_task {
513	self.tree.append(item);
514	self.next_paragraph_task = None;
515	}
516
517	let bytes = self.text.as_bytes();
518	let mut ix = start_ix;
519	loop {
520	let scan_mode = if self.options.contains(Options::ENABLE_TABLES) && ix == start_ix {
521	TableParseMode::Scan
522	} else {
523	TableParseMode::Disabled
524	};
525	let (next_ix, brk) = self.parse_line(ix, None, scan_mode);
526
527	// break out when we find a table
528	if let Some(Item {
529	body: ItemBody::Table(alignment_ix),
530	..
531	}) = brk
532	{
533	let table_cols = self.allocs[alignment_ix].len();
534	self.tree[node_ix].item.body = ItemBody::Table(alignment_ix);
535	// this clears out any stuff we may have appended - but there may
536	// be a cleaner way
537	self.tree[node_ix].child = None;
538	self.tree.pop();
539	self.tree.push();
540	if let Some(ix) = self.parse_table(table_cols, ix, next_ix) {
541	return ix;
542	}
543	}
544
545	ix = next_ix;
546	let mut line_start = LineStart::new(&bytes[ix..]);
547	let current_container = scan_containers(
548	&self.tree,
549	&mut line_start,
550	self.options.has_gfm_footnotes(),
551	) == self.tree.spine_len();
552	let trailing_backslash_pos = match brk {
553	Some(Item {
554	start,
555	body: ItemBody::HardBreak(`true`),
556	..
557	}) if bytes[start] == b'`\\`' => Some(start),
558	_ => None,
559	};
560	if !line_start.scan_space(`4`) {
561	let ix_new = ix + line_start.bytes_scanned();
562	if current_container {
563	if let Some(ix_setext) =
564	self.parse_setext_heading(ix_new, node_ix, trailing_backslash_pos.is_some())
565	{
566	if let Some(pos) = trailing_backslash_pos {
567	self.tree.append_text(pos, pos + `1`, `false`);
568	}
569	ix = ix_setext;
570	break;
571	}
572	}
573	// first check for non-empty lists, then for other interrupts
574	let suffix = &bytes[ix_new..];
575	if self.scan_paragraph_interrupt(suffix, current_container) {
576	if let Some(pos) = trailing_backslash_pos {
577	self.tree.append_text(pos, pos + `1`, `false`);
578	}
579	break;
580	}
581	}
582	line_start.scan_all_space();
583	if line_start.is_at_eol() {
584	if let Some(pos) = trailing_backslash_pos {
585	self.tree.append_text(pos, pos + `1`, `false`);
586	}
587	break;
588	}
589	ix = next_ix + line_start.bytes_scanned();
590	if let Some(item) = brk {
591	self.tree.append(item);
592	}
593	}
594
595	self.pop(ix);
596	ix
597	}
598
599	/// Returns end ix of setext_heading on success.
600	fn parse_setext_heading(
601	&mut self,
602	ix: usize,
603	node_ix: TreeIndex,
604	has_trailing_content: bool,
605	) -> Option<usize> {
606	let bytes = self.text.as_bytes();
607	let (n, level) = scan_setext_heading(&bytes[ix..])?;
608	let mut attrs = None;
609
610	if let Some(cur_ix) = self.tree.cur() {
611	let parent_ix = self.tree.peek_up().unwrap();
612	let header_start = self.tree[parent_ix].item.start;
613	// Note that `self.tree[parent_ix].item.end` might be zero at this point.
614	// Use the end position of the current node (i.e. the last known child
615	// of the parent) instead.
616	let header_end = self.tree[cur_ix].item.end;
617
618	// extract the trailing attribute block
619	let (content_end, attrs_) =
620	self.extract_and_parse_heading_attribute_block(header_start, header_end);
621	attrs = attrs_;
622
623	// strip trailing whitespace
624	let new_end = if has_trailing_content {
625	content_end
626	} else {
627	let trailing_ws =
628	scan_rev_while(&bytes[header_start..content_end], is_ascii_whitespace_no_nl);
629	content_end - trailing_ws
630	};
631
632	if attrs.is_some() {
633	// remove trailing block attributes
634	self.tree.truncate_siblings(new_end);
635	}
636
637	if let Some(cur_ix) = self.tree.cur() {
638	self.tree[cur_ix].item.end = new_end;
639	}
640	}
641
642	self.tree[node_ix].item.body = ItemBody::Heading(
643	level,
644	attrs.map(\|attrs\| self.allocs.allocate_heading(attrs)),
645	);
646
647	Some(ix + n)
648	}
649
650	/// Parse a line of input, appending text and items to tree.
651	///
652	/// Returns: index after line and an item representing the break.
653	fn parse_line(
654	&mut self,
655	start: usize,
656	end: Option<usize>,
657	mode: TableParseMode,
658	) -> (usize, Option<Item>) {
659	let bytes = self.text.as_bytes();
660	let bytes = match end {
661	Some(end) => &bytes[..end],
662	None => bytes,
663	};
664	let bytes_len = bytes.len();
665	let mut pipes = `0`;
666	let mut last_pipe_ix = start;
667	let mut begin_text = start;
668	let mut backslash_escaped = `false`;
669
670	let (final_ix, brk) = iterate_special_bytes(self.lookup_table, bytes, start, \|ix, byte\| {
671	match byte {
672	b'`\n`' \| b'`\r`' => {
673	if let TableParseMode::Active = mode {
674	return LoopInstruction::BreakAtWith(ix, None);
675	}
676
677	let mut i = ix;
678	let eol_bytes = scan_eol(&bytes[ix..]).unwrap();
679
680	let end_ix = ix + eol_bytes;
681	let trailing_backslashes = scan_rev_while(&bytes[..ix], \|b\| b == b'`\\`');
682	if trailing_backslashes % `2` == `1` && end_ix < bytes_len {
683	i -= `1`;
684	self.tree.append_text(begin_text, i, backslash_escaped);
685	backslash_escaped = `false`;
686	return LoopInstruction::BreakAtWith(
687	end_ix,
688	Some(Item {
689	start: i,
690	end: end_ix,
691	body: ItemBody::HardBreak(`true`),
692	}),
693	);
694	}
695
696	if mode == TableParseMode::Scan && pipes > `0` {
697	// check if we may be parsing a table
698	let next_line_ix = ix + eol_bytes;
699	let mut line_start = LineStart::new(&bytes[next_line_ix..]);
700	if scan_containers(
701	&self.tree,
702	&mut line_start,
703	self.options.has_gfm_footnotes(),
704	) == self.tree.spine_len()
705	{
706	let table_head_ix = next_line_ix + line_start.bytes_scanned();
707	let (table_head_bytes, alignment) =
708	scan_table_head(&bytes[table_head_ix..]);
709
710	if table_head_bytes > `0` {
711	// computing header count from number of pipes
712	let header_count =
713	count_header_cols(bytes, pipes, start, last_pipe_ix);
714
715	// make sure they match the number of columns we find in separator line
716	if alignment.len() == header_count {
717	let alignment_ix = self.allocs.allocate_alignment(alignment);
718	let end_ix = table_head_ix + table_head_bytes;
719	return LoopInstruction::BreakAtWith(
720	end_ix,
721	Some(Item {
722	start: i,
723	end: end_ix, // must update later
724	body: ItemBody::Table(alignment_ix),
725	}),
726	);
727	}
728	}
729	}
730	}
731
732	let trailing_whitespace =
733	scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl);
734	if trailing_whitespace >= `2` {
735	i -= trailing_whitespace;
736	self.tree.append_text(begin_text, i, backslash_escaped);
737	backslash_escaped = `false`;
738	return LoopInstruction::BreakAtWith(
739	end_ix,
740	Some(Item {
741	start: i,
742	end: end_ix,
743	body: ItemBody::HardBreak(`false`),
744	}),
745	);
746	}
747
748	self.tree
749	.append_text(begin_text, ix - trailing_whitespace, backslash_escaped);
750	backslash_escaped = `false`;
751
752	LoopInstruction::BreakAtWith(
753	end_ix,
754	Some(Item {
755	start: i,
756	end: end_ix,
757	body: ItemBody::SoftBreak,
758	}),
759	)
760	}
761	b'`\\`' => {
762	if ix + `1` < bytes_len && is_ascii_punctuation(bytes[ix + `1`]) {
763	self.tree.append_text(begin_text, ix, backslash_escaped);
764	if bytes[ix + `1`] == b'`' {
765	let count = `1` + scan_ch_repeat(&bytes[(ix + `2`)..], b'`');
766	self.tree.append(Item {
767	start: ix + `1`,
768	end: ix + count + `1`,
769	body: ItemBody::MaybeCode(count, `true`),
770	});
771	begin_text = ix + `1` + count;
772	backslash_escaped = `false`;
773	LoopInstruction::ContinueAndSkip(count)
774	} else if bytes[ix + `1`] == b'\|' && TableParseMode::Active == mode {
775	// Yeah, it's super weird that backslash escaped pipes in tables aren't "real"
776	// backslash escapes.
777	//
778	// This tree structure is intended for the benefit of inline analysis, and it
779	// is supposed to operate as-if backslash escaped pipes were stripped out in a
780	// separate pass.
781	begin_text = ix + `1`;
782	backslash_escaped = `false`;
783	LoopInstruction::ContinueAndSkip(`1`)
784	} else if ix + `2` < bytes_len
785	&& bytes[ix + `1`] == b'`\\`'
786	&& bytes[ix + `2`] == b'\|'
787	&& TableParseMode::Active == mode
788	{
789	// To parse `\\\|`, discard the backslashes and parse the `\|` that follows it.
790	begin_text = ix + `2`;
791	backslash_escaped = `true`;
792	LoopInstruction::ContinueAndSkip(`2`)
793	} else {
794	begin_text = ix + `1`;
795	backslash_escaped = `true`;
796	LoopInstruction::ContinueAndSkip(`1`)
797	}
798	} else {
799	LoopInstruction::ContinueAndSkip(`0`)
800	}
801	}
802	c @ b'*' \| c @ b'_' \| c @ b'~' => {
803	let string_suffix = &self.text[ix..];
804	let count = `1` + scan_ch_repeat(&string_suffix.as_bytes()[`1`..], c);
805	let can_open = delim_run_can_open(
806	&self.text[start..],
807	string_suffix,
808	count,
809	ix - start,
810	mode,
811	);
812	let can_close = delim_run_can_close(
813	&self.text[start..],
814	string_suffix,
815	count,
816	ix - start,
817	mode,
818	);
819	let is_valid_seq = c != b'~' \|\| count <= `2`;
820
821	if (can_open \|\| can_close) && is_valid_seq {
822	self.tree.append_text(begin_text, ix, backslash_escaped);
823	backslash_escaped = `false`;
824	for i in `0`..count {
825	self.tree.append(Item {
826	start: ix + i,
827	end: ix + i + `1`,
828	body: ItemBody::MaybeEmphasis(count - i, can_open, can_close),
829	});
830	}
831	begin_text = ix + count;
832	}
833	LoopInstruction::ContinueAndSkip(count - `1`)
834	}
835	b'$' => {
836	let string_suffix = &self.text[ix..];
837	let can_open = !string_suffix[`1`..]
838	.as_bytes()
839	.first()
840	.copied()
841	.map_or(`true`, is_ascii_whitespace);
842	let can_close = ix > start
843	&& !self.text[..ix]
844	.as_bytes()
845	.last()
846	.copied()
847	.map_or(`true`, is_ascii_whitespace);
848
849	// 0xFFFF_FFFF... represents the root brace context. Using None would require
850	// storing Option<u8>, which is bigger than u8.
851	//
852	// These shouldn't conflict unless you have 255 levels of nesting, which is
853	// past the intended limit anyway.
854	//
855	// Unbalanced braces will cause the root to be changed, which is why it gets
856	// stored here.
857	let brace_context =
858	if self.brace_context_stack.len() > MATH_BRACE_CONTEXT_MAX_NESTING {
859	self.brace_context_next as u8
860	} else {
861	self.brace_context_stack.last().copied().unwrap_or_else(\|\| {
862	self.brace_context_stack.push(!`0`);
863	!`0`
864	})
865	};
866
867	self.tree.append_text(begin_text, ix, backslash_escaped);
868	self.tree.append(Item {
869	start: ix,
870	end: ix + `1`,
871	body: ItemBody::MaybeMath(can_open, can_close, brace_context),
872	});
873	begin_text = ix + `1`;
874	LoopInstruction::ContinueAndSkip(`0`)
875	}
876	b'{' => {
877	if self.brace_context_stack.len() == MATH_BRACE_CONTEXT_MAX_NESTING {
878	self.brace_context_stack.push(self.brace_context_next as u8);
879	self.brace_context_next = MATH_BRACE_CONTEXT_MAX_NESTING;
880	} else if self.brace_context_stack.len() > MATH_BRACE_CONTEXT_MAX_NESTING {
881	// When we reach the limit of nesting, switch from actually matching
882	// braces to just counting them.
883	self.brace_context_next += `1`;
884	} else if !self.brace_context_stack.is_empty() {
885	// Store nothing if no math environment has been reached yet.
886	self.brace_context_stack.push(self.brace_context_next as u8);
887	self.brace_context_next += `1`;
888	}
889	LoopInstruction::ContinueAndSkip(`0`)
890	}
891	b'}' => {
892	if let &mut [ref mut top_level_context] = &mut self.brace_context_stack[..] {
893	// Unbalanced Braces
894	//
895	// The initial, root top-level brace context is -1, but this is changed whenever an unbalanced
896	// close brace is encountered:
897	//
898	// This is not a math environment: $}$
899	// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\|^
900	// -1 \|-2
901	//
902	// To ensure this can't get parsed as math, each side of the unbalanced
903	// brace is an irreversibly separate brace context. As long as the math
904	// environment itself contains balanced braces, they should share a top level context.
905	//
906	// Math environment contains 2+2: $}$2+2$
907	// ^^^ this is a math environment
908	*top_level_context = top_level_context.wrapping_sub(`1`);
909	} else if self.brace_context_stack.len() > MATH_BRACE_CONTEXT_MAX_NESTING {
910	// When we exceed 25 levels of nesting, switch from accurately balancing braces
911	// to just counting them. When we dip back below the limit, switch back.
912	if self.brace_context_next <= MATH_BRACE_CONTEXT_MAX_NESTING {
913	self.brace_context_stack.pop();
914	} else {
915	self.brace_context_next -= `1`;
916	}
917	} else {
918	self.brace_context_stack.pop();
919	}
920	LoopInstruction::ContinueAndSkip(`0`)
921	}
922	b'`' => {
923	self.tree.append_text(begin_text, ix, backslash_escaped);
924	backslash_escaped = `false`;
925	let count = `1` + scan_ch_repeat(&bytes[(ix + `1`)..], b'`');
926	self.tree.append(Item {
927	start: ix,
928	end: ix + count,
929	body: ItemBody::MaybeCode(count, `false`),
930	});
931	begin_text = ix + count;
932	LoopInstruction::ContinueAndSkip(count - `1`)
933	}
934	b'<' if bytes.get(ix + `1`) != Some(&b'`\\`') => {
935	// Note: could detect some non-HTML cases and early escape here, but not
936	// clear that's a win.
937	self.tree.append_text(begin_text, ix, backslash_escaped);
938	backslash_escaped = `false`;
939	self.tree.append(Item {
940	start: ix,
941	end: ix + `1`,
942	body: ItemBody::MaybeHtml,
943	});
944	begin_text = ix + `1`;
945	LoopInstruction::ContinueAndSkip(`0`)
946	}
947	b'!' => {
948	if ix + `1` < bytes_len && bytes[ix + `1`] == b'[' {
949	self.tree.append_text(begin_text, ix, backslash_escaped);
950	backslash_escaped = `false`;
951	self.tree.append(Item {
952	start: ix,
953	end: ix + `2`,
954	body: ItemBody::MaybeImage,
955	});
956	begin_text = ix + `2`;
957	LoopInstruction::ContinueAndSkip(`1`)
958	} else {
959	LoopInstruction::ContinueAndSkip(`0`)
960	}
961	}
962	b'[' => {
963	self.tree.append_text(begin_text, ix, backslash_escaped);
964	backslash_escaped = `false`;
965	self.tree.append(Item {
966	start: ix,
967	end: ix + `1`,
968	body: ItemBody::MaybeLinkOpen,
969	});
970	begin_text = ix + `1`;
971	LoopInstruction::ContinueAndSkip(`0`)
972	}
973	b']' => {
974	self.tree.append_text(begin_text, ix, backslash_escaped);
975	backslash_escaped = `false`;
976	self.tree.append(Item {
977	start: ix,
978	end: ix + `1`,
979	body: ItemBody::MaybeLinkClose(`true`),
980	});
981	begin_text = ix + `1`;
982	LoopInstruction::ContinueAndSkip(`0`)
983	}
984	b'&' => match scan_entity(&bytes[ix..]) {
985	(n, Some(value)) => {
986	self.tree.append_text(begin_text, ix, backslash_escaped);
987	backslash_escaped = `false`;
988	self.tree.append(Item {
989	start: ix,
990	end: ix + n,
991	body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)),
992	});
993	begin_text = ix + n;
994	LoopInstruction::ContinueAndSkip(n - `1`)
995	}
996	_ => LoopInstruction::ContinueAndSkip(`0`),
997	},
998	b'\|' => {
999	if ix != `0` && bytes[ix - `1`] == b'`\\`' {
1000	LoopInstruction::ContinueAndSkip(`0`)
1001	} else if let TableParseMode::Active = mode {
1002	LoopInstruction::BreakAtWith(ix, None)
1003	} else {
1004	last_pipe_ix = ix;
1005	pipes += `1`;
1006	LoopInstruction::ContinueAndSkip(`0`)
1007	}
1008	}
1009	b'.' => {
1010	if ix + `2` < bytes.len() && bytes[ix + `1`] == b'.' && bytes[ix + `2`] == b'.' {
1011	self.tree.append_text(begin_text, ix, backslash_escaped);
1012	backslash_escaped = `false`;
1013	self.tree.append(Item {
1014	start: ix,
1015	end: ix + `3`,
1016	body: ItemBody::SynthesizeChar('…'),
1017	});
1018	begin_text = ix + `3`;
1019	LoopInstruction::ContinueAndSkip(`2`)
1020	} else {
1021	LoopInstruction::ContinueAndSkip(`0`)
1022	}
1023	}
1024	b'-' => {
1025	let count = `1` + scan_ch_repeat(&bytes[(ix + `1`)..], b'-');
1026	if count == `1` {
1027	LoopInstruction::ContinueAndSkip(`0`)
1028	} else {
1029	let itembody = if count == `2` {
1030	ItemBody::SynthesizeChar('–')
1031	} else if count == `3` {
1032	ItemBody::SynthesizeChar('—')
1033	} else {
1034	let (ems, ens) = match count % `6` {
1035	`0` \| `3` => (count / `3`, `0`),
1036	`2` \| `4` => (`0`, count / `2`),
1037	`1` => (count / `3` - `1`, `2`),
1038	_ => (count / `3`, `1`),
1039	};
1040	// – and — are 3 bytes each in utf8
1041	let mut buf = String::with_capacity(`3` * (ems + ens));
1042	for _ in `0`..ems {
1043	buf.push('—');
1044	}
1045	for _ in `0`..ens {
1046	buf.push('–');
1047	}
1048	ItemBody::SynthesizeText(self.allocs.allocate_cow(buf.into()))
1049	};
1050
1051	self.tree.append_text(begin_text, ix, backslash_escaped);
1052	backslash_escaped = `false`;
1053	self.tree.append(Item {
1054	start: ix,
1055	end: ix + count,
1056	body: itembody,
1057	});
1058	begin_text = ix + count;
1059	LoopInstruction::ContinueAndSkip(count - `1`)
1060	}
1061	}
1062	c @ b'`\'`' \| c @ b'"' => {
1063	let string_suffix = &self.text[ix..];
1064	let can_open =
1065	delim_run_can_open(&self.text[start..], string_suffix, `1`, ix - start, mode);
1066	let can_close = delim_run_can_close(
1067	&self.text[start..],
1068	string_suffix,
1069	`1`,
1070	ix - start,
1071	mode,
1072	);
1073
1074	self.tree.append_text(begin_text, ix, backslash_escaped);
1075	backslash_escaped = `false`;
1076	self.tree.append(Item {
1077	start: ix,
1078	end: ix + `1`,
1079	body: ItemBody::MaybeSmartQuote(c, can_open, can_close),
1080	});
1081	begin_text = ix + `1`;
1082
1083	LoopInstruction::ContinueAndSkip(`0`)
1084	}
1085	_ => LoopInstruction::ContinueAndSkip(`0`),
1086	}
1087	});
1088
1089	if brk.is_none() {
1090	let trailing_whitespace =
1091	scan_rev_while(&bytes[begin_text..final_ix], is_ascii_whitespace_no_nl);
1092	// need to close text at eof
1093	self.tree.append_text(
1094	begin_text,
1095	final_ix - trailing_whitespace,
1096	backslash_escaped,
1097	);
1098	}
1099	(final_ix, brk)
1100	}
1101
1102	/// When start_ix is at the beginning of an HTML block of type 1 to 5,
1103	/// this will find the end of the block, adding the block itself to the
1104	/// tree and also keeping track of the lines of HTML within the block.
1105	///
1106	/// The html_end_tag is the tag that must be found on a line to end the block.
1107	fn parse_html_block_type_1_to_5(
1108	&mut self,
1109	start_ix: usize,
1110	html_end_tag: &str,
1111	mut remaining_space: usize,
1112	mut indent: usize,
1113	) -> usize {
1114	self.tree.append(Item {
1115	start: start_ix,
1116	end: `0`, // set later
1117	body: ItemBody::HtmlBlock,
1118	});
1119	self.tree.push();
1120
1121	let bytes = self.text.as_bytes();
1122	let mut ix = start_ix;
1123	let end_ix;
1124	loop {
1125	let line_start_ix = ix;
1126	ix += scan_nextline(&bytes[ix..]);
1127	self.append_html_line(remaining_space.max(indent), line_start_ix, ix);
1128
1129	let mut line_start = LineStart::new(&bytes[ix..]);
1130	let n_containers = scan_containers(
1131	&self.tree,
1132	&mut line_start,
1133	self.options.has_gfm_footnotes(),
1134	);
1135	if n_containers < self.tree.spine_len() {
1136	end_ix = ix;
1137	break;
1138	}
1139
1140	if self.text[line_start_ix..ix].contains(html_end_tag) {
1141	end_ix = ix;
1142	break;
1143	}
1144
1145	let next_line_ix = ix + line_start.bytes_scanned();
1146	if next_line_ix == self.text.len() {
1147	end_ix = next_line_ix;
1148	break;
1149	}
1150	ix = next_line_ix;
1151	remaining_space = line_start.remaining_space();
1152	indent = `0`;
1153	}
1154	self.pop(end_ix);
1155	ix
1156	}
1157
1158	/// When start_ix is at the beginning of an HTML block of type 6 or 7,
1159	/// this will consume lines until there is a blank line and keep track of
1160	/// the HTML within the block.
1161	fn parse_html_block_type_6_or_7(
1162	&mut self,
1163	start_ix: usize,
1164	mut remaining_space: usize,
1165	mut indent: usize,
1166	) -> usize {
1167	self.tree.append(Item {
1168	start: start_ix,
1169	end: `0`, // set later
1170	body: ItemBody::HtmlBlock,
1171	});
1172	self.tree.push();
1173
1174	let bytes = self.text.as_bytes();
1175	let mut ix = start_ix;
1176	let end_ix;
1177	loop {
1178	let line_start_ix = ix;
1179	ix += scan_nextline(&bytes[ix..]);
1180	self.append_html_line(remaining_space.max(indent), line_start_ix, ix);
1181
1182	let mut line_start = LineStart::new(&bytes[ix..]);
1183	let n_containers = scan_containers(
1184	&self.tree,
1185	&mut line_start,
1186	self.options.has_gfm_footnotes(),
1187	);
1188	if n_containers < self.tree.spine_len() \|\| line_start.is_at_eol() {
1189	end_ix = ix;
1190	break;
1191	}
1192
1193	let next_line_ix = ix + line_start.bytes_scanned();
1194	if next_line_ix == self.text.len() \|\| scan_blank_line(&bytes[next_line_ix..]).is_some()
1195	{
1196	end_ix = next_line_ix;
1197	break;
1198	}
1199	ix = next_line_ix;
1200	remaining_space = line_start.remaining_space();
1201	indent = `0`;
1202	}
1203	self.pop(end_ix);
1204	ix
1205	}
1206
1207	fn parse_indented_code_block(&mut self, start_ix: usize, mut remaining_space: usize) -> usize {
1208	self.tree.append(Item {
1209	start: start_ix,
1210	end: `0`, // will get set later
1211	body: ItemBody::IndentCodeBlock,
1212	});
1213	self.tree.push();
1214	let bytes = self.text.as_bytes();
1215	let mut last_nonblank_child = None;
1216	let mut last_nonblank_ix = `0`;
1217	let mut end_ix = `0`;
1218	self.last_line_blank = `false`;
1219
1220	let mut ix = start_ix;
1221	loop {
1222	let line_start_ix = ix;
1223	ix += scan_nextline(&bytes[ix..]);
1224	self.append_code_text(remaining_space, line_start_ix, ix);
1225	// TODO(spec clarification): should we synthesize newline at EOF?
1226
1227	if !self.last_line_blank {
1228	last_nonblank_child = self.tree.cur();
1229	last_nonblank_ix = ix;
1230	end_ix = ix;
1231	}
1232
1233	let mut line_start = LineStart::new(&bytes[ix..]);
1234	let n_containers = scan_containers(
1235	&self.tree,
1236	&mut line_start,
1237	self.options.has_gfm_footnotes(),
1238	);
1239	if n_containers < self.tree.spine_len()
1240	\|\| !(line_start.scan_space(`4`) \|\| line_start.is_at_eol())
1241	{
1242	break;
1243	}
1244	let next_line_ix = ix + line_start.bytes_scanned();
1245	if next_line_ix == self.text.len() {
1246	break;
1247	}
1248	ix = next_line_ix;
1249	remaining_space = line_start.remaining_space();
1250	self.last_line_blank = scan_blank_line(&bytes[ix..]).is_some();
1251	}
1252
1253	// Trim trailing blank lines.
1254	if let Some(child) = last_nonblank_child {
1255	self.tree[child].next = None;
1256	self.tree[child].item.end = last_nonblank_ix;
1257	}
1258	self.pop(end_ix);
1259	ix
1260	}
1261
1262	fn parse_fenced_code_block(
1263	&mut self,
1264	start_ix: usize,
1265	indent: usize,
1266	fence_ch: u8,
1267	n_fence_char: usize,
1268	) -> usize {
1269	let bytes = self.text.as_bytes();
1270	let mut info_start = start_ix + n_fence_char;
1271	info_start += scan_whitespace_no_nl(&bytes[info_start..]);
1272	// TODO: info strings are typically very short. wouldn't it be faster
1273	// to just do a forward scan here?
1274	let mut ix = info_start + scan_nextline(&bytes[info_start..]);
1275	let info_end = ix - scan_rev_while(&bytes[info_start..ix], is_ascii_whitespace);
1276	let info_string = unescape(&self.text[info_start..info_end], self.tree.is_in_table());
1277	self.tree.append(Item {
1278	start: start_ix,
1279	end: `0`, // will get set later
1280	body: ItemBody::FencedCodeBlock(self.allocs.allocate_cow(info_string)),
1281	});
1282	self.tree.push();
1283	loop {
1284	let mut line_start = LineStart::new(&bytes[ix..]);
1285	let n_containers = scan_containers(
1286	&self.tree,
1287	&mut line_start,
1288	self.options.has_gfm_footnotes(),
1289	);
1290	if n_containers < self.tree.spine_len() {
1291	// this line will get parsed again as not being part of the code
1292	// if it's blank, it should be parsed as a blank line
1293	self.pop(ix);
1294	return ix;
1295	}
1296	line_start.scan_space(indent);
1297	let mut close_line_start = line_start.clone();
1298	if !close_line_start.scan_space(`4` - indent) {
1299	let close_ix = ix + close_line_start.bytes_scanned();
1300	if let Some(n) = scan_closing_code_fence(&bytes[close_ix..], fence_ch, n_fence_char)
1301	{
1302	ix = close_ix + n;
1303	self.pop(ix);
1304	// try to read trailing whitespace or it will register as a completely blank line
1305	return ix + scan_blank_line(&bytes[ix..]).unwrap_or(`0`);
1306	}
1307	}
1308	let remaining_space = line_start.remaining_space();
1309	ix += line_start.bytes_scanned();
1310	let next_ix = ix + scan_nextline(&bytes[ix..]);
1311	self.append_code_text(remaining_space, ix, next_ix);
1312	ix = next_ix;
1313	}
1314	}
1315
1316	fn parse_metadata_block(&mut self, start_ix: usize, metadata_block_ch: u8) -> usize {
1317	let bytes = self.text.as_bytes();
1318	let metadata_block_kind = match metadata_block_ch {
1319	b'-' => MetadataBlockKind::YamlStyle,
1320	b'+' => MetadataBlockKind::PlusesStyle,
1321	_ => panic!("Erroneous metadata block character when parsing metadata block"),
1322	};
1323	// 3 delimiter characters
1324	let mut ix = start_ix + `3` + scan_nextline(&bytes[start_ix + `3`..]);
1325	self.tree.append(Item {
1326	start: start_ix,
1327	end: `0`, // will get set later
1328	body: ItemBody::MetadataBlock(metadata_block_kind),
1329	});
1330	self.tree.push();
1331	loop {
1332	let mut line_start = LineStart::new(&bytes[ix..]);
1333	let n_containers = scan_containers(
1334	&self.tree,
1335	&mut line_start,
1336	self.options.has_gfm_footnotes(),
1337	);
1338	if n_containers < self.tree.spine_len() {
1339	break;
1340	}
1341	if let (_, `0`) = calc_indent(&bytes[ix..], `4`) {
1342	if let Some(n) = scan_closing_metadata_block(&bytes[ix..], metadata_block_ch) {
1343	ix += n;
1344	break;
1345	}
1346	}
1347	let remaining_space = line_start.remaining_space();
1348	ix += line_start.bytes_scanned();
1349	let next_ix = ix + scan_nextline(&bytes[ix..]);
1350	self.append_code_text(remaining_space, ix, next_ix);
1351	ix = next_ix;
1352	}
1353
1354	self.pop(ix);
1355
1356	// try to read trailing whitespace or it will register as a completely blank line
1357	ix + scan_blank_line(&bytes[ix..]).unwrap_or(`0`)
1358	}
1359
1360	fn append_code_text(&mut self, remaining_space: usize, start: usize, end: usize) {
1361	if remaining_space > `0` {
1362	let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into());
1363	self.tree.append(Item {
1364	start,
1365	end: start,
1366	body: ItemBody::SynthesizeText(cow_ix),
1367	});
1368	}
1369	if self.text.as_bytes()[end - `2`] == b'`\r`' {
1370	// Normalize CRLF to LF
1371	self.tree.append_text(start, end - `2`, `false`);
1372	self.tree.append_text(end - `1`, end, `false`);
1373	} else {
1374	self.tree.append_text(start, end, `false`);
1375	}
1376	}
1377
1378	/// Appends a line of HTML to the tree.
1379	fn append_html_line(&mut self, remaining_space: usize, start: usize, end: usize) {
1380	if remaining_space > `0` {
1381	let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into());
1382	self.tree.append(Item {
1383	start,
1384	end: start,
1385	body: ItemBody::SynthesizeText(cow_ix),
1386	});
1387	}
1388	if self.text.as_bytes()[end - `2`] == b'`\r`' {
1389	// Normalize CRLF to LF
1390	self.tree.append(Item {
1391	start,
1392	end: end - `2`,
1393	body: ItemBody::Html,
1394	});
1395	self.tree.append(Item {
1396	start: end - `1`,
1397	end,
1398	body: ItemBody::Html,
1399	});
1400	} else {
1401	self.tree.append(Item {
1402	start,
1403	end,
1404	body: ItemBody::Html,
1405	});
1406	}
1407	}
1408
1409	/// Pop a container, setting its end.
1410	fn pop(&mut self, ix: usize) {
1411	let cur_ix = self.tree.pop().unwrap();
1412	self.tree[cur_ix].item.end = ix;
1413	if let ItemBody::List(`true`, _, _) = self.tree[cur_ix].item.body {
1414	surgerize_tight_list(&mut self.tree, cur_ix);
1415	self.begin_list_item = None;
1416	}
1417	}
1418
1419	/// Close a list if it's open. Also set loose if last line was blank
1420	/// and end current list if it's a lone, empty item
1421	fn finish_list(&mut self, ix: usize) {
1422	self.finish_empty_list_item();
1423	if let Some(node_ix) = self.tree.peek_up() {
1424	if let ItemBody::List(_, _, _) = self.tree[node_ix].item.body {
1425	self.pop(ix);
1426	}
1427	}
1428	if self.last_line_blank {
1429	if let Some(node_ix) = self.tree.peek_grandparent() {
1430	if let ItemBody::List(ref mut is_tight, _, _) = self.tree[node_ix].item.body {
1431	*is_tight = `false`;
1432	}
1433	}
1434	self.last_line_blank = `false`;
1435	}
1436	}
1437
1438	fn finish_empty_list_item(&mut self) {
1439	if let Some(begin_list_item) = self.begin_list_item {
1440	if self.last_line_blank {
1441	// A list item can begin with at most one blank line.
1442	if let Some(node_ix) = self.tree.peek_up() {
1443	if let ItemBody::ListItem(_) = self.tree[node_ix].item.body {
1444	self.pop(begin_list_item);
1445	}
1446	}
1447	}
1448	}
1449	self.begin_list_item = None;
1450	}
1451
1452	/// Continue an existing list or start a new one if there's not an open
1453	/// list that matches.
1454	fn continue_list(&mut self, start: usize, ch: u8, index: u64) {
1455	self.finish_empty_list_item();
1456	if let Some(node_ix) = self.tree.peek_up() {
1457	if let ItemBody::List(ref mut is_tight, existing_ch, _) = self.tree[node_ix].item.body {
1458	if existing_ch == ch {
1459	if self.last_line_blank {
1460	*is_tight = `false`;
1461	self.last_line_blank = `false`;
1462	}
1463	return;
1464	}
1465	}
1466	// TODO: this is not the best choice for end; maybe get end from last list item.
1467	self.finish_list(start);
1468	}
1469	self.tree.append(Item {
1470	start,
1471	end: `0`, // will get set later
1472	body: ItemBody::List(`true`, ch, index),
1473	});
1474	self.tree.push();
1475	self.last_line_blank = `false`;
1476	}
1477
1478	/// Parse a thematic break.
1479	///
1480	/// Returns index of start of next line.
1481	fn parse_hrule(&mut self, hrule_size: usize, ix: usize) -> usize {
1482	self.tree.append(Item {
1483	start: ix,
1484	end: ix + hrule_size,
1485	body: ItemBody::Rule,
1486	});
1487	ix + hrule_size
1488	}
1489
1490	/// Parse an ATX heading.
1491	///
1492	/// Returns index of start of next line.
1493	fn parse_atx_heading(&mut self, start: usize, atx_level: HeadingLevel) -> usize {
1494	let mut ix = start;
1495	let heading_ix = self.tree.append(Item {
1496	start,
1497	end: `0`, // set later
1498	body: ItemBody::default(), // set later
1499	});
1500	ix += atx_level as usize;
1501	// next char is space or eol (guaranteed by scan_atx_heading)
1502	let bytes = self.text.as_bytes();
1503	if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
1504	self.tree[heading_ix].item.end = ix + eol_bytes;
1505	self.tree[heading_ix].item.body = ItemBody::Heading(atx_level, None);
1506	return ix + eol_bytes;
1507	}
1508	// skip leading spaces
1509	let skip_spaces = scan_whitespace_no_nl(&bytes[ix..]);
1510	ix += skip_spaces;
1511
1512	// now handle the header text
1513	let header_start = ix;
1514	let header_node_idx = self.tree.push(); // so that we can set the endpoint later
1515
1516	// trim the trailing attribute block before parsing the entire line, if necessary
1517	let (end, content_end, attrs) = if self.options.contains(Options::ENABLE_HEADING_ATTRIBUTES)
1518	{
1519	// the start of the next line is the end of the header since the
1520	// header cannot have line breaks
1521	let header_end = header_start + scan_nextline(&bytes[header_start..]);
1522	let (content_end, attrs) =
1523	self.extract_and_parse_heading_attribute_block(header_start, header_end);
1524	self.parse_line(ix, Some(content_end), TableParseMode::Disabled);
1525	(header_end, content_end, attrs)
1526	} else {
1527	let (line_ix, line_brk) = self.parse_line(ix, None, TableParseMode::Disabled);
1528	ix = line_ix;
1529	// Backslash at end is actually hard line break
1530	if let Some(Item {
1531	start,
1532	end,
1533	body: ItemBody::HardBreak(`true`),
1534	}) = line_brk
1535	{
1536	self.tree.append_text(start, end, `false`);
1537	}
1538	(ix, ix, None)
1539	};
1540	self.tree[header_node_idx].item.end = end;
1541
1542	// remove trailing matter from header text
1543	let mut empty_text_node = `false`;
1544	if let Some(cur_ix) = self.tree.cur() {
1545	// remove closing of the ATX heading
1546	let header_text = &bytes[header_start..content_end];
1547	let mut limit = header_text
1548	.iter()
1549	.rposition(\|&b\| !(b == b'`\n`' \|\| b == b'`\r`' \|\| b == b' '))
1550	.map_or(`0`, \|i\| i + `1`);
1551	let closer = header_text[..limit]
1552	.iter()
1553	.rposition(\|&b\| b != b'#')
1554	.map_or(`0`, \|i\| i + `1`);
1555	if closer == `0` {
1556	limit = closer;
1557	} else {
1558	let spaces = scan_rev_while(&header_text[..closer], \|b\| b == b' ');
1559	if spaces > `0` {
1560	limit = closer - spaces;
1561	}
1562	}
1563	// if text is only spaces, then remove them
1564	self.tree[cur_ix].item.end = limit + header_start;
1565
1566	// limit = 0 when text is empty after removing spaces
1567	if limit == `0` {
1568	empty_text_node = `true`;
1569	}
1570	}
1571
1572	if empty_text_node {
1573	self.tree.remove_node();
1574	} else {
1575	self.tree.pop();
1576	}
1577	self.tree[heading_ix].item.body = ItemBody::Heading(
1578	atx_level,
1579	attrs.map(\|attrs\| self.allocs.allocate_heading(attrs)),
1580	);
1581
1582	end
1583	}
1584
1585	/// Returns the number of bytes scanned on success.
1586	fn parse_footnote(&mut self, start: usize) -> Option<usize> {
1587	let bytes = &self.text.as_bytes()[start..];
1588	if !bytes.starts_with(b"[^") {
1589	return None;
1590	}
1591	let (mut i, label) = if self.options.has_gfm_footnotes() {
1592	// GitHub doesn't allow footnote definition labels to contain line breaks.
1593	// It actually does allow this for link definitions under certain circumstances,
1594	// but for this it's simpler to avoid it.
1595	scan_link_label_rest(&self.text[start + `2`..], &\|_\| None, self.tree.is_in_table())?
1596	} else {
1597	self.parse_refdef_label(start + `2`)?
1598	};
1599	if self.options.has_gfm_footnotes() && label.bytes().any(\|b\| b == b'`\r`' \|\| b == b'`\n`') {
1600	// GitHub doesn't allow footnote definition labels to contain line breaks,
1601	// even if they're escaped.
1602	return None;
1603	}
1604	i += `2`;
1605	if scan_ch(&bytes[i..], b':') == `0` {
1606	return None;
1607	}
1608	i += `1`;
1609	self.finish_list(start);
1610	if let Some(node_ix) = self.tree.peek_up() {
1611	if let ItemBody::FootnoteDefinition(..) = self.tree[node_ix].item.body {
1612	// finish previous footnote if it's still open
1613	self.pop(start);
1614	}
1615	}
1616	if self.options.has_gfm_footnotes() {
1617	i += scan_whitespace_no_nl(&bytes[i..]);
1618	}
1619	self.allocs.footdefs.0.insert(
1620	UniCase::new(label.clone()),
1621	FootnoteDef { use_count: `0` },
1622	);
1623	self.tree.append(Item {
1624	start,
1625	end: `0`, // will get set later
1626	// TODO: check whether the label here is strictly necessary
1627	body: ItemBody::FootnoteDefinition(self.allocs.allocate_cow(label)),
1628	});
1629	self.tree.push();
1630	Some(i)
1631	}
1632
1633	/// Tries to parse a reference label, which can be interrupted by new blocks.
1634	/// On success, returns the number of bytes of the label and the label itself.
1635	fn parse_refdef_label(&self, start: usize) -> Option<(usize, CowStr<'a>)> {
1636	scan_link_label_rest(
1637	&self.text[start..],
1638	&\|bytes\| {
1639	let mut line_start = LineStart::new(bytes);
1640	let current_container = scan_containers(
1641	&self.tree,
1642	&mut line_start,
1643	self.options.has_gfm_footnotes(),
1644	) == self.tree.spine_len();
1645	if line_start.scan_space(`4`) {
1646	return Some(line_start.bytes_scanned());
1647	}
1648	let bytes_scanned = line_start.bytes_scanned();
1649	let suffix = &bytes[bytes_scanned..];
1650	if self.scan_paragraph_interrupt(suffix, current_container)
1651	\|\| (current_container && scan_setext_heading(suffix).is_some())
1652	{
1653	None
1654	} else {
1655	Some(bytes_scanned)
1656	}
1657	},
1658	self.tree.is_in_table(),
1659	)
1660	}
1661
1662	/// Returns number of bytes scanned, label and definition on success.
1663	fn parse_refdef_total(&mut self, start: usize) -> Option<(usize, LinkLabel<'a>, LinkDef<'a>)> {
1664	let bytes = &self.text.as_bytes()[start..];
1665	if scan_ch(bytes, b'[') == `0` {
1666	return None;
1667	}
1668	let (mut i, label) = self.parse_refdef_label(start + `1`)?;
1669	i += `1`;
1670	if scan_ch(&bytes[i..], b':') == `0` {
1671	return None;
1672	}
1673	i += `1`;
1674	let (bytecount, link_def) = self.scan_refdef(start, start + i)?;
1675	Some((bytecount + i, UniCase::new(label), link_def))
1676	}
1677
1678	/// Returns number of bytes and number of newlines
1679	fn scan_refdef_space(&self, bytes: &[u8], mut i: usize) -> Option<(usize, usize)> {
1680	let mut newlines = `0`;
1681	loop {
1682	let whitespaces = scan_whitespace_no_nl(&bytes[i..]);
1683	i += whitespaces;
1684	if let Some(eol_bytes) = scan_eol(&bytes[i..]) {
1685	i += eol_bytes;
1686	newlines += `1`;
1687	if newlines > `1` {
1688	return None;
1689	}
1690	} else {
1691	break;
1692	}
1693	let mut line_start = LineStart::new(&bytes[i..]);
1694	let current_container = scan_containers(
1695	&self.tree,
1696	&mut line_start,
1697	self.options.has_gfm_footnotes(),
1698	) == self.tree.spine_len();
1699	if !line_start.scan_space(`4`) {
1700	let suffix = &bytes[i + line_start.bytes_scanned()..];
1701	if self.scan_paragraph_interrupt(suffix, current_container)
1702	\|\| scan_setext_heading(suffix).is_some()
1703	{
1704	return None;
1705	}
1706	}
1707	i += line_start.bytes_scanned();
1708	}
1709	Some((i, newlines))
1710	}
1711
1712	// returns (bytelength, title_str)
1713	fn scan_refdef_title<'t>(&self, text: &'t str) -> Option<(usize, CowStr<'t>)> {
1714	let bytes = text.as_bytes();
1715	let closing_delim = match bytes.first()? {
1716	b'`\'`' => b'`\'`',
1717	b'"' => b'"',
1718	b'(' => b')',
1719	_ => return None,
1720	};
1721	let mut bytecount = `1`;
1722	let mut linestart = `1`;
1723
1724	let mut linebuf = None;
1725
1726	while let Some(&c) = bytes.get(bytecount) {
1727	match c {
1728	b'(' if closing_delim == b')' => {
1729	// https://spec.commonmark.org/0.30/#link-title
1730	// a sequence of zero or more characters between matching parentheses ((...)),
1731	// including a ( or ) character only if it is backslash-escaped.
1732	return None;
1733	}
1734	b'`\n`' \| b'`\r`' => {
1735	// push text to line buffer
1736	// this is used to strip the block formatting:
1737	//
1738	// > [first]: http://example.com "
1739	// > second"
1740	//
1741	// should get turned into `<a href="example.com" title=" second">first</a>`
1742	let linebuf = if let Some(linebuf) = &mut linebuf {
1743	linebuf
1744	} else {
1745	linebuf = Some(String::new());
1746	linebuf.as_mut().unwrap()
1747	};
1748	linebuf.push_str(&text[linestart..bytecount]);
1749	linebuf.push('`\n`'); // normalize line breaks
1750	// skip line break
1751	bytecount += `1`;
1752	if c == b'`\r`' && bytes.get(bytecount) == Some(&b'`\n`') {
1753	bytecount += `1`;
1754	}
1755	let mut line_start = LineStart::new(&bytes[bytecount..]);
1756	let current_container = scan_containers(
1757	&self.tree,
1758	&mut line_start,
1759	self.options.has_gfm_footnotes(),
1760	) == self.tree.spine_len();
1761	if !line_start.scan_space(`4`) {
1762	let suffix = &bytes[bytecount + line_start.bytes_scanned()..];
1763	if self.scan_paragraph_interrupt(suffix, current_container)
1764	\|\| scan_setext_heading(suffix).is_some()
1765	{
1766	return None;
1767	}
1768	}
1769	line_start.scan_all_space();
1770	bytecount += line_start.bytes_scanned();
1771	linestart = bytecount;
1772	if scan_blank_line(&bytes[bytecount..]).is_some() {
1773	// blank line - not allowed
1774	return None;
1775	}
1776	}
1777	b'`\\`' => {
1778	bytecount += `1`;
1779	if let Some(c) = bytes.get(bytecount) {
1780	if c != &b'`\r`' && c != &b'`\n`' {
1781	bytecount += `1`;
1782	}
1783	}
1784	}
1785	c if c == closing_delim => {
1786	let cow = if let Some(mut linebuf) = linebuf {
1787	linebuf.push_str(&text[linestart..bytecount]);
1788	CowStr::from(linebuf)
1789	} else {
1790	CowStr::from(&text[linestart..bytecount])
1791	};
1792	return Some((bytecount + `1`, cow));
1793	}
1794	_ => {
1795	bytecount += `1`;
1796	}
1797	}
1798	}
1799	None
1800	}
1801
1802	/// Returns # of bytes and definition.
1803	/// Assumes the label of the reference including colon has already been scanned.
1804	fn scan_refdef(&self, span_start: usize, start: usize) -> Option<(usize, LinkDef<'a>)> {
1805	let bytes = self.text.as_bytes();
1806
1807	// whitespace between label and url (including up to one newline)
1808	let (mut i, _newlines) = self.scan_refdef_space(bytes, start)?;
1809
1810	// scan link dest
1811	let (dest_length, dest) = scan_link_dest(self.text, i, LINK_MAX_NESTED_PARENS)?;
1812	if dest_length == `0` {
1813	return None;
1814	}
1815	let dest = unescape(dest, self.tree.is_in_table());
1816	i += dest_length;
1817
1818	// no title
1819	let mut backup = (
1820	i - start,
1821	LinkDef {
1822	dest,
1823	title: None,
1824	span: span_start..i,
1825	},
1826	);
1827
1828	// scan whitespace between dest and label
1829	let (mut i, newlines) =
1830	if let Some((new_i, mut newlines)) = self.scan_refdef_space(bytes, i) {
1831	if i == self.text.len() {
1832	newlines += `1`;
1833	}
1834	if new_i == i && newlines == `0` {
1835	return None;
1836	}
1837	if newlines > `1` {
1838	return Some(backup);
1839	};
1840	(new_i, newlines)
1841	} else {
1842	return Some(backup);
1843	};
1844
1845	// scan title
1846	// if this fails but newline == 1, return also a refdef without title
1847	if let Some((title_length, title)) = self.scan_refdef_title(&self.text[i..]) {
1848	i += title_length;
1849	if scan_blank_line(&bytes[i..]).is_some() {
1850	backup.0 = i - start;
1851	backup.1.span = span_start..i;
1852	backup.1.title = Some(unescape(title, self.tree.is_in_table()));
1853	return Some(backup);
1854	}
1855	}
1856	if newlines > `0` {
1857	Some(backup)
1858	} else {
1859	None
1860	}
1861	}
1862
1863	/// Checks whether we should break a paragraph on the given input.
1864	fn scan_paragraph_interrupt(&self, bytes: &[u8], current_container: bool) -> bool {
1865	let has_footnote = self.options.contains(Options::ENABLE_FOOTNOTES);
1866	if scan_paragraph_interrupt_no_table(bytes, current_container, has_footnote, &self.tree) {
1867	return `true`;
1868	}
1869	// pulldown-cmark allows heavy tables, that have a `\|` on the header row,
1870	// to interrupt paragraphs.
1871	//
1872	// ```markdown
1873	// This is a table
1874	// \| a \| b \| c \|
1875	// \|---\|---\|---\|
1876	// \| d \| e \| f \|
1877	//
1878	// This is not a table
1879	// a \| b \| c
1880	// ---\|---\|---
1881	// d \| e \| f
1882	// ```
1883	if !self.options.contains(Options::ENABLE_TABLES) \|\| !bytes.starts_with(b"\|") {
1884	return `false`;
1885	}
1886
1887	// Checking if something's a valid table or not requires looking at two lines.
1888	// First line, count unescaped pipes.
1889	let mut pipes = `0`;
1890	let mut next_line_ix = `0`;
1891	let mut bsesc = `false`;
1892	let mut last_pipe_ix = `0`;
1893	for (i, &byte) in bytes.iter().enumerate() {
1894	match byte {
1895	b'`\\`' => {
1896	bsesc = `true`;
1897	continue;
1898	}
1899	b'\|' if !bsesc => {
1900	pipes += `1`;
1901	last_pipe_ix = i;
1902	}
1903	b'`\r`' \| b'`\n`' => {
1904	next_line_ix = i + scan_eol(&bytes[i..]).unwrap();
1905	break;
1906	}
1907	_ => {}
1908	}
1909	bsesc = `false`;
1910	}
1911
1912	// scan_eol can't return 0, so this can't be zero
1913	if next_line_ix == `0` {
1914	return `false`;
1915	}
1916
1917	// Scan the table head. The part that looks like:
1918	//
1919	// \|---\|---\|---\|
1920	//
1921	// Also scan any containing items, since it's on its own line, and
1922	// might be nested inside a block quote or something
1923	//
1924	// > Table: First
1925	// > \| first col \| second col \|
1926	// > \|-----------\|------------\|
1927	// ^
1928	// \| need to skip over the `>` when checking for the table
1929	let mut line_start = LineStart::new(&bytes[next_line_ix..]);
1930	if scan_containers(
1931	&self.tree,
1932	&mut line_start,
1933	self.options.has_gfm_footnotes(),
1934	) != self.tree.spine_len()
1935	{
1936	return `false`;
1937	}
1938	let table_head_ix = next_line_ix + line_start.bytes_scanned();
1939	let (table_head_bytes, alignment) = scan_table_head(&bytes[table_head_ix..]);
1940
1941	if table_head_bytes == `0` {
1942	return `false`;
1943	}
1944
1945	// computing header count from number of pipes
1946	let header_count = count_header_cols(bytes, pipes, `0`, last_pipe_ix);
1947
1948	// make sure they match the number of columns we find in separator line
1949	alignment.len() == header_count
1950	}
1951
1952	/// Extracts and parses a heading attribute block if exists.
1953	///
1954	/// Returns `(end_offset_of_heading_content, (id, classes))`.
1955	///
1956	/// If `header_end` is less than or equal to `header_start`, the given
1957	/// input is considered as empty.
1958	fn extract_and_parse_heading_attribute_block(
1959	&mut self,
1960	header_start: usize,
1961	header_end: usize,
1962	) -> (usize, Option<HeadingAttributes<'a>>) {
1963	if !self.options.contains(Options::ENABLE_HEADING_ATTRIBUTES) {
1964	return (header_end, None);
1965	}
1966
1967	// extract the trailing attribute block
1968	let header_bytes = &self.text.as_bytes()[header_start..header_end];
1969	let (content_len, attr_block_range_rel) =
1970	extract_attribute_block_content_from_header_text(header_bytes);
1971	let content_end = header_start + content_len;
1972	let attrs = attr_block_range_rel.and_then(\|r\| {
1973	parse_inside_attribute_block(
1974	&self.text[(header_start + r.start)..(header_start + r.end)],
1975	)
1976	});
1977	(content_end, attrs)
1978	}
1979	}
1980
1981	/// Scanning modes for `Parser`'s `parse_line` method.
1982	#[derive(PartialEq, Eq, Copy, Clone)]
1983	enum TableParseMode {
1984	/// Inside a paragraph, scanning for table headers.
1985	Scan,
1986	/// Inside a table.
1987	Active,
1988	/// Inside a paragraph, not scanning for table headers.
1989	Disabled,
1990	}
1991
1992	/// Computes the number of header columns in a table line by computing the number of dividing pipes
1993	/// that aren't followed or preceded by whitespace.
1994	fn count_header_cols(
1995	bytes: &[u8],
1996	mut pipes: usize,
1997	mut start: usize,
1998	last_pipe_ix: usize,
1999	) -> usize {
2000	// was first pipe preceded by whitespace? if so, subtract one
2001	start += scan_whitespace_no_nl(&bytes[start..]);
2002	if bytes[start] == b'\|' {
2003	pipes -= `1`;
2004	}
2005
2006	// was last pipe followed by whitespace? if so, sub one
2007	if scan_blank_line(&bytes[(last_pipe_ix + `1`)..]).is_some() {
2008	pipes
2009	} else {
2010	pipes + `1`
2011	}
2012	}
2013
2014	/// Checks whether we should break a paragraph on the given input.
2015	///
2016	/// Use `FirstPass::scan_paragraph_interrupt` in any context that allows
2017	/// tables to interrupt the paragraph.
2018	fn scan_paragraph_interrupt_no_table(
2019	bytes: &[u8],
2020	current_container: bool,
2021	has_footnote: bool,
2022	tree: &Tree<Item>,
2023	) -> bool {
2024	scan_eol(bytes).is_some()
2025	\|\| scan_hrule(bytes).is_ok()
2026	\|\| scan_atx_heading(bytes).is_some()
2027	\|\| scan_code_fence(bytes).is_some()
2028	\|\| scan_blockquote_start(bytes).is_some()
2029	\|\| scan_listitem(bytes).map_or(`false`, \|(ix, delim, index, _)\| {
2030	! current_container \|\|
2031	tree.is_in_table() \|\|
2032	// we don't allow interruption by either empty lists or
2033	// numbered lists starting at an index other than 1
2034	(delim == b'*' \|\| delim == b'-' \|\| delim == b'+' \|\| index == `1`)
2035	&& (scan_blank_line(&bytes[ix..]).is_none())
2036	})
2037	\|\| bytes.starts_with(b"<")
2038	&& (get_html_end_tag(&bytes[`1`..]).is_some() \|\| starts_html_block_type_6(&bytes[`1`..]))
2039	\|\| (has_footnote
2040	&& bytes.starts_with(b"[^")
2041	&& scan_link_label_rest(
2042	std::str::from_utf8(&bytes[`2`..]).unwrap(),
2043	&\|_\| None,
2044	tree.is_in_table(),
2045	)
2046	.map_or(`false`, \|(len, _)\| bytes.get(`2` + len) == Some(&b':')))
2047	}
2048
2049	/// Assumes `text_bytes` is preceded by `<`.
2050	fn get_html_end_tag(text_bytes: &[u8]) -> Option<&'static str> {
2051	static BEGIN_TAGS: &[&[u8]; `4`] = &[b"pre", b"style", b"script", b"textarea"];
2052	static ST_BEGIN_TAGS: &[&[u8]; `3`] = &[b"!--", b"?", b"![CDATA["];
2053
2054	for (beg_tag, end_tag) in BEGIN_TAGS
2055	.iter()
2056	.zip(["</pre>", "</style>", "</script>", "</textarea>"].iter())
2057	{
2058	let tag_len = beg_tag.len();
2059
2060	if text_bytes.len() < tag_len {
2061	// begin tags are increasing in size
2062	break;
2063	}
2064
2065	if !text_bytes[..tag_len].eq_ignore_ascii_case(beg_tag) {
2066	continue;
2067	}
2068
2069	// Must either be the end of the line...
2070	if text_bytes.len() == tag_len {
2071	return Some(end_tag);
2072	}
2073
2074	// ...or be followed by whitespace, newline, or '>'.
2075	let s = text_bytes[tag_len];
2076	if is_ascii_whitespace(s) \|\| s == b'>' {
2077	return Some(end_tag);
2078	}
2079	}
2080
2081	for (beg_tag, end_tag) in ST_BEGIN_TAGS.iter().zip(["-->", "?>", "]]>"].iter()) {
2082	if text_bytes.starts_with(beg_tag) {
2083	return Some(end_tag);
2084	}
2085	}
2086
2087	if text_bytes.len() > `1` && text_bytes[`0`] == b'!' && text_bytes[`1`].is_ascii_alphabetic() {
2088	Some(">")
2089	} else {
2090	None
2091	}
2092	}
2093
2094	// https://english.stackexchange.com/a/285573
2095	fn surgerize_tight_list(tree: &mut Tree<Item>, list_ix: TreeIndex) {
2096	let mut list_item = tree[list_ix].child;
2097	while let Some(listitem_ix) = list_item {
2098	// first child is special, controls how we repoint list_item.child
2099	let list_item_firstborn = tree[listitem_ix].child;
2100
2101	// Check that list item has children - this is not necessarily the case!
2102	if let Some(firstborn_ix) = list_item_firstborn {
2103	if let ItemBody::Paragraph = tree[firstborn_ix].item.body {
2104	tree[listitem_ix].child = tree[firstborn_ix].child;
2105	}
2106
2107	let mut list_item_child = Some(firstborn_ix);
2108	let mut node_to_repoint = None;
2109	while let Some(child_ix) = list_item_child {
2110	// surgerize paragraphs
2111	let repoint_ix = if let ItemBody::Paragraph = tree[child_ix].item.body {
2112	if let Some(child_firstborn) = tree[child_ix].child {
2113	if let Some(repoint_ix) = node_to_repoint {
2114	tree[repoint_ix].next = Some(child_firstborn);
2115	}
2116	let mut child_lastborn = child_firstborn;
2117	while let Some(lastborn_next_ix) = tree[child_lastborn].next {
2118	child_lastborn = lastborn_next_ix;
2119	}
2120	child_lastborn
2121	} else {
2122	child_ix
2123	}
2124	} else {
2125	child_ix
2126	};
2127
2128	node_to_repoint = Some(repoint_ix);
2129	tree[repoint_ix].next = tree[child_ix].next;
2130	list_item_child = tree[child_ix].next;
2131	}
2132	}
2133
2134	list_item = tree[listitem_ix].next;
2135	}
2136	}
2137
2138	/// Determines whether the delimiter run starting at given index is
2139	/// left-flanking, as defined by the commonmark spec (and isn't intraword
2140	/// for _ delims).
2141	/// suffix is &s[ix..], which is passed in as an optimization, since taking
2142	/// a string subslice is O(n).
2143	fn delim_run_can_open(
2144	s: &str,
2145	suffix: &str,
2146	run_len: usize,
2147	ix: usize,
2148	mode: TableParseMode,
2149	) -> bool {
2150	let next_char = if let Some(c) = suffix.chars().nth(run_len) {
2151	c
2152	} else {
2153	return `false`;
2154	};
2155	if next_char.is_whitespace() {
2156	return `false`;
2157	}
2158	if ix == `0` {
2159	return `true`;
2160	}
2161	if mode == TableParseMode::Active {
2162	if s[..ix].ends_with('\|') && !s[..ix].ends_with(r"\\|") {
2163	return `true`;
2164	}
2165	if next_char == '\|' {
2166	return `false`;
2167	}
2168	}
2169	let delim = suffix.chars().next().unwrap();
2170	// `` and `~~` can be intraword, `_` and `~` cannot*
2171	if delim == '*' && !is_punctuation(next_char) {
2172	return `true`;
2173	}
2174	if delim == '~' && run_len > `1` {
2175	return `true`;
2176	}
2177	let prev_char = s[..ix].chars().last().unwrap();
2178	if delim == '~' && prev_char == '~' && !is_punctuation(next_char) {
2179	return `true`;
2180	}
2181
2182	prev_char.is_whitespace()
2183	\|\| is_punctuation(prev_char) && (delim != '`\'`' \|\| ![']', ')'].contains(&prev_char))
2184	}
2185
2186	/// Determines whether the delimiter run starting at given index is
2187	/// right-flanking, as defined by the commonmark spec (and isn't intraword
2188	/// for _ delims)
2189	fn delim_run_can_close(
2190	s: &str,
2191	suffix: &str,
2192	run_len: usize,
2193	ix: usize,
2194	mode: TableParseMode,
2195	) -> bool {
2196	if ix == `0` {
2197	return `false`;
2198	}
2199	let prev_char = s[..ix].chars().last().unwrap();
2200	if prev_char.is_whitespace() {
2201	return `false`;
2202	}
2203	let next_char = if let Some(c) = suffix.chars().nth(run_len) {
2204	c
2205	} else {
2206	return `true`;
2207	};
2208	if mode == TableParseMode::Active {
2209	if s[..ix].ends_with('\|') && !s[..ix].ends_with(r"\\|") {
2210	return `false`;
2211	}
2212	if next_char == '\|' {
2213	return `true`;
2214	}
2215	}
2216	let delim = suffix.chars().next().unwrap();
2217	// `` and `~~` can be intraword, `_` and `~` cannot*
2218	if (delim == '*' \|\| (delim == '~' && run_len > `1`)) && !is_punctuation(prev_char) {
2219	return `true`;
2220	}
2221	if delim == '~' && prev_char == '~' {
2222	return `true`;
2223	}
2224
2225	next_char.is_whitespace() \|\| is_punctuation(next_char)
2226	}
2227
2228	fn create_lut(options: &Options) -> LookupTable {
2229	#[cfg(all(target_arch = "x86_64", feature = "simd"))]
2230	{
2231	LookupTable {
2232	simd: simd::compute_lookup(options),
2233	scalar: special_bytes(options),
2234	}
2235	}
2236	#[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
2237	{
2238	special_bytes(options)
2239	}
2240	}
2241
2242	fn special_bytes(options: &Options) -> [bool; `256`] {
2243	let mut bytes = [`false`; `256`];
2244	let standard_bytes = [
2245	b'`\n`', b'`\r`', b'*', b'_', b'&', b'`\\`', b'[', b']', b'<', b'!', b'`',
2246	];
2247
2248	for &byte in &standard_bytes {
2249	bytes[byte as usize] = `true`;
2250	}
2251	if options.contains(Options::ENABLE_TABLES) {
2252	bytes[b'\|' as usize] = `true`;
2253	}
2254	if options.contains(Options::ENABLE_STRIKETHROUGH) {
2255	bytes[b'~' as usize] = `true`;
2256	}
2257	if options.contains(Options::ENABLE_MATH) {
2258	bytes[b'$' as usize] = `true`;
2259	bytes[b'{' as usize] = `true`;
2260	bytes[b'}' as usize] = `true`;
2261	}
2262	if options.contains(Options::ENABLE_SMART_PUNCTUATION) {
2263	for &byte in &[b'.', b'-', b'"', b'`\'`'] {
2264	bytes[byte as usize] = `true`;
2265	}
2266	}
2267
2268	bytes
2269	}
2270
2271	enum LoopInstruction<T> {
2272	/// Continue looking for more special bytes, but skip next few bytes.
2273	ContinueAndSkip(usize),
2274	/// Break looping immediately, returning with the given index and value.
2275	BreakAtWith(usize, T),
2276	}
2277
2278	#[cfg(all(target_arch = "x86_64", feature = "simd"))]
2279	struct LookupTable {
2280	simd: [u8; `16`],
2281	scalar: [bool; `256`],
2282	}
2283
2284	#[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
2285	type LookupTable = [bool; `256`];
2286
2287	/// This function walks the byte slices from the given index and
2288	/// calls the callback function on all bytes (and their indices) that are in the following set:
2289	/// `` ` ``, `\`, `&`, ``, `_`, `~`, `!`, `<`, `[`, `]`, `\|`, `\r`, `\n`*
2290	/// It is guaranteed not call the callback on other bytes.
2291	/// Whenever `callback(ix, byte)` returns a `ContinueAndSkip(n)` value, the callback
2292	/// will not be called with an index that is less than `ix + n + 1`.
2293	/// When the callback returns a `BreakAtWith(end_ix, opt+val)`, no more callbacks will be
2294	/// called and the function returns immediately with the return value `(end_ix, opt_val)`.
2295	/// If `BreakAtWith(..)` is never returned, this function will return the first
2296	/// index that is outside the byteslice bound and a `None` value.
2297	fn iterate_special_bytes<F, T>(
2298	lut: &LookupTable,
2299	bytes: &[u8],
2300	ix: usize,
2301	callback: F,
2302	) -> (usize, Option<T>)
2303	where
2304	F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2305	{
2306	#[cfg(all(target_arch = "x86_64", feature = "simd"))]
2307	{
2308	simd::iterate_special_bytes(lut, bytes, ix, callback)
2309	}
2310	#[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
2311	{
2312	scalar_iterate_special_bytes(lut, bytes, ix, callback)
2313	}
2314	}
2315
2316	fn scalar_iterate_special_bytes<F, T>(
2317	lut: &[bool; `256`],
2318	bytes: &[u8],
2319	mut ix: usize,
2320	mut callback: F,
2321	) -> (usize, Option<T>)
2322	where
2323	F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2324	{
2325	while ix < bytes.len() {
2326	let b: u8 = bytes[ix];
2327	if lut[b as usize] {
2328	match callback(ix, b) {
2329	LoopInstruction::ContinueAndSkip(skip: usize) => {
2330	ix += skip;
2331	}
2332	LoopInstruction::BreakAtWith(ix: usize, val: Option) => {
2333	return (ix, val);
2334	}
2335	}
2336	}
2337	ix += `1`;
2338	}
2339
2340	(ix, None)
2341	}
2342
2343	/// Split the usual heading content range and the content inside the trailing attribute block.
2344	///
2345	/// Returns `(leading_content_len, Option<trailing_attr_block_range>)`.
2346	///
2347	/// Note that `trailing_attr_block_range` will be empty range when the block
2348	/// is `{}`, since the range is content inside the wrapping `{` and `}`.
2349	///
2350	/// The closing `}` of an attribute block can have trailing whitespaces.
2351	/// They are automatically trimmed when the attribute block is being searched.
2352	///
2353	/// However, this method does not trim the trailing whitespaces of heading content.
2354	/// It is callers' responsibility to trim them if necessary.
2355	fn extract_attribute_block_content_from_header_text(
2356	heading: &[u8],
2357	) -> (usize, Option<Range<usize>>) {
2358	let heading_len = heading.len();
2359	let mut ix = heading_len;
2360	ix -= scan_rev_while(heading, \|b\| {
2361	b == b'`\n`' \|\| b == b'`\r`' \|\| b == b' ' \|\| b == b'`\t`'
2362	});
2363	if ix == `0` {
2364	return (heading_len, None);
2365	}
2366
2367	let attr_block_close = ix - `1`;
2368	if heading.get(attr_block_close) != Some(&b'}') {
2369	// The last character is not `}`. No attribute blocks found.
2370	return (heading_len, None);
2371	}
2372	// move cursor before the closing right brace (`}`)
2373	ix -= `1`;
2374
2375	ix -= scan_rev_while(&heading[..ix], \|b\| {
2376	// Characters to be excluded:
2377	// `{` and `}`: special characters to open and close an attribute block.*
2378	// `\\`: a special character to escape many characters and disable some syntaxes.*
2379	// + Handling of this escape character differs among markdown processors.
2380	// + Escaped characters will be separate text node from neighbors, so
2381	// it is not easy to handle unescaped string and trim the trailing block.
2382	// `<` and `>`: special characters to start and end HTML tag.*
2383	// + No known processors converts `{#<i>foo</i>}` into
2384	// `id="<i>foo</>"` as of this writing, so hopefully
2385	// this restriction won't cause compatibility issues.
2386	// `\n` and `\r`: a newline character.*
2387	// + Setext heading can have multiple lines. However it is hard to support
2388	// attribute blocks that have newline inside, since the parsing proceeds line by
2389	// line and lines will be separate nodes even they are logically a single text.
2390	!matches!(b, b'{' \| b'}' \| b'<' \| b'>' \| b'`\\`' \| b'`\n`' \| b'`\r`')
2391	});
2392	if ix == `0` {
2393	// `{` is not found. No attribute blocks available.
2394	return (heading_len, None);
2395	}
2396	let attr_block_open = ix - `1`;
2397	if heading[attr_block_open] != b'{' {
2398	// `{` is not found. No attribute blocks available.
2399	return (heading_len, None);
2400	}
2401
2402	(attr_block_open, Some(ix..attr_block_close))
2403	}
2404
2405	/// Parses an attribute block content, such as `.class1 #id .class2`.
2406	///
2407	/// Returns `(id, classes)`.
2408	///
2409	/// It is callers' responsibility to find opening and closing characters of the attribute
2410	/// block. Usually [`extract_attribute_block_content_from_header_text`] function does it for you.
2411	///
2412	/// Note that this parsing requires explicit whitespace separators between
2413	/// attributes. This is intentional design with the reasons below:
2414	///
2415	/// to keep conversion simple and easy to understand for any possible input,*
2416	/// to avoid adding less obvious conversion rule that can reduce compatibility*
2417	/// with other implementations more, and
2418	/// to follow the major design of implementations with the support for the*
2419	/// attribute blocks extension (as of this writing).
2420	///
2421	/// See also: [`Options::ENABLE_HEADING_ATTRIBUTES`].
2422	///
2423	/// [`Options::ENABLE_HEADING_ATTRIBUTES`]: `crate::Options::ENABLE_HEADING_ATTRIBUTES`
2424	fn parse_inside_attribute_block(inside_attr_block: &str) -> Option<HeadingAttributes<'_>> {
2425	let mut id = None;
2426	let mut classes = Vec::new();
2427	let mut attrs = Vec::new();
2428
2429	for attr in inside_attr_block.split_ascii_whitespace() {
2430	// iterator returned by `str::split_ascii_whitespace` never emits empty
2431	// strings, so taking first byte won't panic.
2432	if attr.len() > `1` {
2433	let first_byte = attr.as_bytes()[`0`];
2434	if first_byte == b'#' {
2435	id = Some(attr[`1`..].into());
2436	} else if first_byte == b'.' {
2437	classes.push(attr[`1`..].into());
2438	} else {
2439	let split = attr.split_once('=');
2440	if let Some((key, value)) = split {
2441	attrs.push((key.into(), Some(value.into())));
2442	} else {
2443	attrs.push((attr.into(), None));
2444	}
2445	}
2446	}
2447	}
2448
2449	Some(HeadingAttributes { id, classes, attrs })
2450	}
2451
2452	#[cfg(all(target_arch = "x86_64", feature = "simd"))]
2453	mod simd {
2454	//! SIMD byte scanning logic.
2455	//!
2456	//! This module provides functions that allow walking through byteslices, calling
2457	//! provided callback functions on special bytes and their indices using SIMD.
2458	//! The byteset is defined in `compute_lookup`.
2459	//!
2460	//! The idea is to load in a chunk of 16 bytes and perform a lookup into a set of
2461	//! bytes on all the bytes in this chunk simultaneously. We produce a 16 bit bitmask
2462	//! from this and call the callback on every index corresponding to a 1 in this mask
2463	//! before moving on to the next chunk. This allows us to move quickly when there
2464	//! are no or few matches.
2465	//!
2466	//! The table lookup is inspired by this [great overview]. However, since all of the
2467	//! bytes we're interested in are ASCII, we don't quite need the full generality of
2468	//! the universal algorithm and are hence able to skip a few instructions.
2469	//!
2470	//! [great overview]: http://0x80.pl/articles/simd-byte-lookup.html
2471
2472	use super::{LookupTable, LoopInstruction};
2473	use crate::Options;
2474	use core::arch::x86_64::*;
2475
2476	const VECTOR_SIZE: usize = std::mem::size_of::<__m128i>();
2477
2478	/// Generates a lookup table containing the bitmaps for our
2479	/// special marker bytes. This is effectively a 128 element 2d bitvector,
2480	/// that can be indexed by a four bit row index (the lower nibble)
2481	/// and a three bit column index (upper nibble).
2482	pub(super) fn compute_lookup(options: &Options) -> [u8; `16`] {
2483	let mut lookup = [`0u8`; `16`];
2484	let standard_bytes = [
2485	b'`\n`', b'`\r`', b'*', b'_', b'&', b'`\\`', b'[', b']', b'<', b'!', b'`',
2486	];
2487
2488	for &byte in &standard_bytes {
2489	add_lookup_byte(&mut lookup, byte);
2490	}
2491	if options.contains(Options::ENABLE_TABLES) {
2492	add_lookup_byte(&mut lookup, b'\|');
2493	}
2494	if options.contains(Options::ENABLE_STRIKETHROUGH) {
2495	add_lookup_byte(&mut lookup, b'~');
2496	}
2497	if options.contains(Options::ENABLE_MATH) {
2498	add_lookup_byte(&mut lookup, b'$');
2499	add_lookup_byte(&mut lookup, b'{');
2500	add_lookup_byte(&mut lookup, b'}');
2501	}
2502	if options.contains(Options::ENABLE_SMART_PUNCTUATION) {
2503	for &byte in &[b'.', b'-', b'"', b'`\'`'] {
2504	add_lookup_byte(&mut lookup, byte);
2505	}
2506	}
2507
2508	lookup
2509	}
2510
2511	fn add_lookup_byte(lookup: &mut [u8; `16`], byte: u8) {
2512	lookup[(byte & `0x0f`) as usize] \|= `1` << (byte >> `4`);
2513	}
2514
2515	/// Computes a bit mask for the given byteslice starting from the given index,
2516	/// where the 16 least significant bits indicate (by value of 1) whether or not
2517	/// there is a special character at that byte position. The least significant bit
2518	/// corresponds to `bytes[ix]` and the most significant bit corresponds to
2519	/// `bytes[ix + 15]`.
2520	/// It is only safe to call this function when `bytes.len() >= ix + VECTOR_SIZE`.
2521	#[target_feature(enable = "ssse3")]
2522	#[inline]
2523	unsafe fn compute_mask(lut: &[u8; `16`], bytes: &[u8], ix: usize) -> i32 {
2524	debug_assert!(bytes.len() >= ix + VECTOR_SIZE);
2525
2526	let bitmap = _mm_loadu_si128(lut.as_ptr() as *const __m128i);
2527	// Small lookup table to compute single bit bitshifts
2528	// for 16 bytes at once.
2529	let bitmask_lookup =
2530	_mm_setr_epi8(`1`, `2`, `4`, `8`, `16`, `32`, `64`, `-128`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`);
2531
2532	// Load input from memory.
2533	let raw_ptr = bytes.as_ptr().add(ix) as *const __m128i;
2534	let input = _mm_loadu_si128(raw_ptr);
2535	// Compute the bitmap using the bottom nibble as an index
2536	// into the lookup table. Note that non-ascii bytes will have
2537	// their most significant bit set and will map to lookup[0].
2538	let bitset = _mm_shuffle_epi8(bitmap, input);
2539	// Compute the high nibbles of the input using a 16-bit rightshift of four
2540	// and a mask to prevent most-significant bit issues.
2541	let higher_nibbles = _mm_and_si128(_mm_srli_epi16(input, `4`), _mm_set1_epi8(`0x0f`));
2542	// Create a bitmask for the bitmap by perform a left shift of the value
2543	// of the higher nibble. Bytes with their most significant set are mapped
2544	// to -1 (all ones).
2545	let bitmask = _mm_shuffle_epi8(bitmask_lookup, higher_nibbles);
2546	// Test the bit of the bitmap by AND'ing the bitmap and the mask together.
2547	let tmp = _mm_and_si128(bitset, bitmask);
2548	// Check whether the result was not null. NEQ is not a SIMD intrinsic,
2549	// but comparing to the bitmask is logically equivalent. This also prevents us
2550	// from matching any non-ASCII bytes since none of the bitmaps were all ones
2551	// (-1).
2552	let result = _mm_cmpeq_epi8(tmp, bitmask);
2553
2554	// Return the resulting bitmask.
2555	_mm_movemask_epi8(result)
2556	}
2557
2558	/// Calls callback on byte indices and their value.
2559	/// Breaks when callback returns LoopInstruction::BreakAtWith(ix, val). And skips the
2560	/// number of bytes in callback return value otherwise.
2561	/// Returns the final index and a possible break value.
2562	pub(super) fn iterate_special_bytes<F, T>(
2563	lut: &LookupTable,
2564	bytes: &[u8],
2565	ix: usize,
2566	callback: F,
2567	) -> (usize, Option<T>)
2568	where
2569	F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2570	{
2571	if is_x86_feature_detected!("ssse3") && bytes.len() >= VECTOR_SIZE {
2572	unsafe { simd_iterate_special_bytes(&lut.simd, bytes, ix, callback) }
2573	} else {
2574	super::scalar_iterate_special_bytes(&lut.scalar, bytes, ix, callback)
2575	}
2576	}
2577
2578	/// Calls the callback function for every 1 in the given bitmask with
2579	/// the index `offset + ix`, where `ix` is the position of the 1 in the mask.
2580	/// Returns `Ok(ix)` to continue from index `ix`, `Err((end_ix, opt_val)` to break with
2581	/// final index `end_ix` and optional value `opt_val`.
2582	unsafe fn process_mask<F, T>(
2583	mut mask: i32,
2584	bytes: &[u8],
2585	mut offset: usize,
2586	callback: &mut F,
2587	) -> Result<usize, (usize, Option<T>)>
2588	where
2589	F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2590	{
2591	while mask != `0` {
2592	let mask_ix = mask.trailing_zeros() as usize;
2593	offset += mask_ix;
2594	match callback(offset, *bytes.get_unchecked(offset)) {
2595	LoopInstruction::ContinueAndSkip(skip) => {
2596	offset += skip + `1`;
2597	mask = mask.wrapping_shr((skip + `1` + mask_ix) as u32);
2598	}
2599	LoopInstruction::BreakAtWith(ix, val) => return Err((ix, val)),
2600	}
2601	}
2602	Ok(offset)
2603	}
2604
2605	#[target_feature(enable = "ssse3")]
2606	/// Important: only call this function when `bytes.len() >= 16`. Doing
2607	/// so otherwise may exhibit undefined behaviour.
2608	unsafe fn simd_iterate_special_bytes<F, T>(
2609	lut: &[u8; `16`],
2610	bytes: &[u8],
2611	mut ix: usize,
2612	mut callback: F,
2613	) -> (usize, Option<T>)
2614	where
2615	F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2616	{
2617	debug_assert!(bytes.len() >= VECTOR_SIZE);
2618	let upperbound = bytes.len() - VECTOR_SIZE;
2619
2620	while ix < upperbound {
2621	let mask = compute_mask(lut, bytes, ix);
2622	let block_start = ix;
2623	ix = match process_mask(mask, bytes, ix, &mut callback) {
2624	Ok(ix) => std::cmp::max(ix, VECTOR_SIZE + block_start),
2625	Err((end_ix, val)) => return (end_ix, val),
2626	};
2627	}
2628
2629	if bytes.len() > ix {
2630	// shift off the bytes at start we have already scanned
2631	let mask = compute_mask(lut, bytes, upperbound) >> ix - upperbound;
2632	if let Err((end_ix, val)) = process_mask(mask, bytes, ix, &mut callback) {
2633	return (end_ix, val);
2634	}
2635	}
2636
2637	(bytes.len(), None)
2638	}
2639
2640	#[cfg(test)]
2641	mod simd_test {
2642	use super::super::create_lut;
2643	use super::{iterate_special_bytes, LoopInstruction};
2644	use crate::Options;
2645
2646	fn check_expected_indices(bytes: &[u8], expected: &[usize], skip: usize) {
2647	let mut opts = Options::empty();
2648	opts.insert(Options::ENABLE_MATH);
2649	opts.insert(Options::ENABLE_TABLES);
2650	opts.insert(Options::ENABLE_FOOTNOTES);
2651	opts.insert(Options::ENABLE_STRIKETHROUGH);
2652	opts.insert(Options::ENABLE_TASKLISTS);
2653
2654	let lut = create_lut(&opts);
2655	let mut indices = vec![];
2656
2657	iterate_special_bytes::<_, i32>(&lut, bytes, `0`, \|ix, _byte_ty\| {
2658	indices.push(ix);
2659	LoopInstruction::ContinueAndSkip(skip)
2660	});
2661
2662	assert_eq!(&indices[..], expected);
2663	}
2664
2665	#[test]
2666	fn simple_no_match() {
2667	check_expected_indices("abcdef0123456789".as_bytes(), &[], `0`);
2668	}
2669
2670	#[test]
2671	fn simple_match() {
2672	check_expected_indices("*bcd&f0123456789".as_bytes(), &[`0`, `4`], `0`);
2673	}
2674
2675	#[test]
2676	fn single_open_fish() {
2677	check_expected_indices("<".as_bytes(), &[`0`], `0`);
2678	}
2679
2680	#[test]
2681	fn long_match() {
2682	check_expected_indices("0123456789abcde~*bcd&f0".as_bytes(), &[`15`, `16`, `20`], `0`);
2683	}
2684
2685	#[test]
2686	fn border_skip() {
2687	check_expected_indices("0123456789abcde~~~~d&f0".as_bytes(), &[`15`, `20`], `3`);
2688	}
2689
2690	#[test]
2691	fn exhaustive_search() {
2692	let chars = [
2693	b'`\n`', b'`\r`', b'*', b'_', b'~', b'\|', b'&', b'`\\`', b'[', b']', b'<', b'!', b'`',
2694	b'$', b'{', b'}',
2695	];
2696
2697	for &c in &chars {
2698	for i in `0u8`..=`255` {
2699	if !chars.contains(&i) {
2700	// full match
2701	let mut buf = [i; `18`];
2702	buf[`3`] = c;
2703	buf[`6`] = c;
2704
2705	check_expected_indices(&buf[..], &[`3`, `6`], `0`);
2706	}
2707	}
2708	}
2709	}
2710	}
2711	}
2712