parse.rs source code [crates/pulldown-cmark/src/parse.rs]

1	// Copyright 2017 Google Inc. All rights reserved.
2	//
3	// Permission is hereby granted, free of charge, to any person obtaining a copy
4	// of this software and associated documentation files (the "Software"), to deal
5	// in the Software without restriction, including without limitation the rights
6	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7	// copies of the Software, and to permit persons to whom the Software is
8	// furnished to do so, subject to the following conditions:
9	//
10	// The above copyright notice and this permission notice shall be included in
11	// all copies or substantial portions of the Software.
12	//
13	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19	// THE SOFTWARE.
20
21	//! Tree-based two pass parser.
22
23	use std::cmp::{max, min};
24	use std::collections::{HashMap, VecDeque};
25	use std::iter::FusedIterator;
26	use std::num::NonZeroUsize;
27	use std::ops::{Index, Range};
28
29	use unicase::UniCase;
30
31	use crate::firstpass::run_first_pass;
32	use crate::linklabel::{scan_link_label_rest, LinkLabel, ReferenceLabel};
33	use crate::scanners::*;
34	use crate::strings::CowStr;
35	use crate::tree::{Tree, TreeIndex};
36	use crate::{Alignment, CodeBlockKind, Event, HeadingLevel, LinkType, Options, Tag};
37
38	// Allowing arbitrary depth nested parentheses inside link destinations
39	// can create denial of service vulnerabilities if we're not careful.
40	// The simplest countermeasure is to limit their depth, which is
41	// explicitly allowed by the spec as long as the limit is at least 3:
42	// https://spec.commonmark.org/0.29/#link-destination
43	const LINK_MAX_NESTED_PARENS: usize = `5`;
44
45	#[derive(Debug, Default, Clone, Copy)]
46	pub(crate) struct Item {
47	pub start: usize,
48	pub end: usize,
49	pub body: ItemBody,
50	}
51
52	#[derive(Debug, PartialEq, Clone, Copy)]
53	pub(crate) enum ItemBody {
54	Paragraph,
55	Text,
56	SoftBreak,
57	HardBreak,
58
59	// These are possible inline items, need to be resolved in second pass.
60
61	// repeats, can_open, can_close
62	MaybeEmphasis(usize, bool, bool),
63	// quote byte, can_open, can_close
64	MaybeSmartQuote(u8, bool, bool),
65	MaybeCode(usize, bool), // number of backticks, preceded by backslash
66	MaybeHtml,
67	MaybeLinkOpen,
68	// bool indicates whether or not the preceding section could be a reference
69	MaybeLinkClose(bool),
70	MaybeImage,
71
72	// These are inline items after resolution.
73	Emphasis,
74	Strong,
75	Strikethrough,
76	Code(CowIndex),
77	Link(LinkIndex),
78	Image(LinkIndex),
79	FootnoteReference(CowIndex),
80	TaskListMarker(bool), // true for checked
81
82	Rule,
83	Heading(HeadingLevel, Option<HeadingIndex>), // heading level
84	FencedCodeBlock(CowIndex),
85	IndentCodeBlock,
86	Html,
87	OwnedHtml(CowIndex),
88	BlockQuote,
89	List(bool, u8, u64), // is_tight, list character, list start index
90	ListItem(usize), // indent level
91	SynthesizeText(CowIndex),
92	SynthesizeChar(char),
93	FootnoteDefinition(CowIndex),
94
95	// Tables
96	Table(AlignmentIndex),
97	TableHead,
98	TableRow,
99	TableCell,
100
101	// Dummy node at the top of the tree - should not be used otherwise!
102	Root,
103	}
104
105	impl<'a> ItemBody {
106	fn is_inline(&self) -> bool {
107	matches!(
108	*self,
109	ItemBody::MaybeEmphasis(..)
110	\| ItemBody::MaybeSmartQuote(..)
111	\| ItemBody::MaybeHtml
112	\| ItemBody::MaybeCode(..)
113	\| ItemBody::MaybeLinkOpen
114	\| ItemBody::MaybeLinkClose(..)
115	\| ItemBody::MaybeImage
116	)
117	}
118	}
119
120	impl<'a> Default for ItemBody {
121	fn default() -> Self {
122	ItemBody::Root
123	}
124	}
125
126	#[derive(Debug)]
127	pub struct BrokenLink<'a> {
128	pub span: std::ops::Range<usize>,
129	pub link_type: LinkType,
130	pub reference: CowStr<'a>,
131	}
132
133	/// Markdown event iterator.
134	pub struct Parser<'input, 'callback> {
135	text: &'input str,
136	options: Options,
137	tree: Tree<Item>,
138	allocs: Allocations<'input>,
139	broken_link_callback: BrokenLinkCallback<'input, 'callback>,
140	html_scan_guard: HtmlScanGuard,
141
142	// used by inline passes. store them here for reuse
143	inline_stack: InlineStack,
144	link_stack: LinkStack,
145	}
146
147	impl<'input, 'callback> std::fmt::Debug for Parser<'input, 'callback> {
148	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149	// Only print the fileds that have public types.
150	f&mut DebugStruct<'_, '_>.debug_struct("Parser")
151	.field("text", &self.text)
152	.field("options", &self.options)
153	.field(
154	name:"broken_link_callback",
155	&self.broken_link_callback.as_ref().map(\|_\| ..),
156	)
157	.finish()
158	}
159	}
160
161	impl<'input, 'callback> Parser<'input, 'callback> {
162	/// Creates a new event iterator for a markdown string without any options enabled.
163	pub fn new(text: &'input str) -> Self {
164	Parser::new_ext(text, Options::empty())
165	}
166
167	/// Creates a new event iterator for a markdown string with given options.
168	pub fn new_ext(text: &'input str, options: Options) -> Self {
169	Parser::new_with_broken_link_callback(text, options, None)
170	}
171
172	/// In case the parser encounters any potential links that have a broken
173	/// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
174	/// the provided callback will be called with the reference name,
175	/// and the returned pair will be used as the link name and title if it is not
176	/// `None`.
177	pub fn new_with_broken_link_callback(
178	text: &'input str,
179	options: Options,
180	broken_link_callback: BrokenLinkCallback<'input, 'callback>,
181	) -> Self {
182	let (mut tree, allocs) = run_first_pass(text, options);
183	tree.reset();
184	let inline_stack = Default::default();
185	let link_stack = Default::default();
186	let html_scan_guard = Default::default();
187	Parser {
188	text,
189	options,
190	tree,
191	allocs,
192	broken_link_callback,
193	inline_stack,
194	link_stack,
195	html_scan_guard,
196	}
197	}
198
199	/// Returns a reference to the internal `RefDefs` object, which provides access
200	/// to the internal map of reference definitions.
201	pub fn reference_definitions(&self) -> &RefDefs {
202	&self.allocs.refdefs
203	}
204
205	/// Handle inline markup.
206	///
207	/// When the parser encounters any item indicating potential inline markup, all
208	/// inline markup passes are run on the remainder of the chain.
209	///
210	/// Note: there's some potential for optimization here, but that's future work.
211	fn handle_inline(&mut self) {
212	self.handle_inline_pass1();
213	self.handle_emphasis();
214	}
215
216	/// Handle inline HTML, code spans, and links.
217	///
218	/// This function handles both inline HTML and code spans, because they have
219	/// the same precedence. It also handles links, even though they have lower
220	/// precedence, because the URL of links must not be processed.
221	fn handle_inline_pass1(&mut self) {
222	let mut code_delims = CodeDelims::new();
223	let mut cur = self.tree.cur();
224	let mut prev = None;
225
226	let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
227	let block_text = &self.text[..block_end];
228
229	while let Some(mut cur_ix) = cur {
230	match self.tree[cur_ix].item.body {
231	ItemBody::MaybeHtml => {
232	let next = self.tree[cur_ix].next;
233	let autolink = if let Some(next_ix) = next {
234	scan_autolink(block_text, self.tree[next_ix].item.start)
235	} else {
236	None
237	};
238
239	if let Some((ix, uri, link_type)) = autolink {
240	let node = scan_nodes_to_ix(&self.tree, next, ix);
241	let text_node = self.tree.create_node(Item {
242	start: self.tree[cur_ix].item.start + `1`,
243	end: ix - `1`,
244	body: ItemBody::Text,
245	});
246	let link_ix = self.allocs.allocate_link(link_type, uri, "".into());
247	self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
248	self.tree[cur_ix].item.end = ix;
249	self.tree[cur_ix].next = node;
250	self.tree[cur_ix].child = Some(text_node);
251	prev = cur;
252	cur = node;
253	if let Some(node_ix) = cur {
254	self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix);
255	}
256	continue;
257	} else {
258	let inline_html = next.and_then(\|next_ix\| {
259	self.scan_inline_html(
260	block_text.as_bytes(),
261	self.tree[next_ix].item.start,
262	)
263	});
264	if let Some((span, ix)) = inline_html {
265	let node = scan_nodes_to_ix(&self.tree, next, ix);
266	self.tree[cur_ix].item.body = if !span.is_empty() {
267	let converted_string =
268	String::from_utf8(span).expect("invalid utf8");
269	ItemBody::OwnedHtml(
270	self.allocs.allocate_cow(converted_string.into()),
271	)
272	} else {
273	ItemBody::Html
274	};
275	self.tree[cur_ix].item.end = ix;
276	self.tree[cur_ix].next = node;
277	prev = cur;
278	cur = node;
279	if let Some(node_ix) = cur {
280	self.tree[node_ix].item.start =
281	max(self.tree[node_ix].item.start, ix);
282	}
283	continue;
284	}
285	}
286	self.tree[cur_ix].item.body = ItemBody::Text;
287	}
288	ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
289	if preceded_by_backslash {
290	search_count -= `1`;
291	if search_count == `0` {
292	self.tree[cur_ix].item.body = ItemBody::Text;
293	prev = cur;
294	cur = self.tree[cur_ix].next;
295	continue;
296	}
297	}
298
299	if code_delims.is_populated() {
300	// we have previously scanned all codeblock delimiters,
301	// so we can reuse that work
302	if let Some(scan_ix) = code_delims.find(cur_ix, search_count) {
303	self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
304	} else {
305	self.tree[cur_ix].item.body = ItemBody::Text;
306	}
307	} else {
308	// we haven't previously scanned all codeblock delimiters,
309	// so walk the AST
310	let mut scan = if search_count > `0` {
311	self.tree[cur_ix].next
312	} else {
313	None
314	};
315	while let Some(scan_ix) = scan {
316	if let ItemBody::MaybeCode(delim_count, _) =
317	self.tree[scan_ix].item.body
318	{
319	if search_count == delim_count {
320	self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
321	code_delims.clear();
322	break;
323	} else {
324	code_delims.insert(delim_count, scan_ix);
325	}
326	}
327	scan = self.tree[scan_ix].next;
328	}
329	if scan == None {
330	self.tree[cur_ix].item.body = ItemBody::Text;
331	}
332	}
333	}
334	ItemBody::MaybeLinkOpen => {
335	self.tree[cur_ix].item.body = ItemBody::Text;
336	self.link_stack.push(LinkStackEl {
337	node: cur_ix,
338	ty: LinkStackTy::Link,
339	});
340	}
341	ItemBody::MaybeImage => {
342	self.tree[cur_ix].item.body = ItemBody::Text;
343	self.link_stack.push(LinkStackEl {
344	node: cur_ix,
345	ty: LinkStackTy::Image,
346	});
347	}
348	ItemBody::MaybeLinkClose(could_be_ref) => {
349	self.tree[cur_ix].item.body = ItemBody::Text;
350	if let Some(tos) = self.link_stack.pop() {
351	if tos.ty == LinkStackTy::Disabled {
352	continue;
353	}
354	let next = self.tree[cur_ix].next;
355	if let Some((next_ix, url, title)) =
356	self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
357	{
358	let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
359	if let Some(prev_ix) = prev {
360	self.tree[prev_ix].next = None;
361	}
362	cur = Some(tos.node);
363	cur_ix = tos.node;
364	let link_ix = self.allocs.allocate_link(LinkType::Inline, url, title);
365	self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
366	ItemBody::Image(link_ix)
367	} else {
368	ItemBody::Link(link_ix)
369	};
370	self.tree[cur_ix].child = self.tree[cur_ix].next;
371	self.tree[cur_ix].next = next_node;
372	self.tree[cur_ix].item.end = next_ix;
373	if let Some(next_node_ix) = next_node {
374	self.tree[next_node_ix].item.start =
375	max(self.tree[next_node_ix].item.start, next_ix);
376	}
377
378	if tos.ty == LinkStackTy::Link {
379	self.link_stack.disable_all_links();
380	}
381	} else {
382	// ok, so its not an inline link. maybe it is a reference
383	// to a defined link?
384	let scan_result = scan_reference(
385	&self.tree,
386	block_text,
387	next,
388	self.options.contains(Options::ENABLE_FOOTNOTES),
389	);
390	let (node_after_link, link_type) = match scan_result {
391	// [label][reference]
392	RefScan::LinkLabel(_, end_ix) => {
393	// Toggle reference viability of the last closing bracket,
394	// so that we can skip it on future iterations in case
395	// it fails in this one. In particular, we won't call
396	// the broken link callback twice on one reference.
397	let reference_close_node = if let Some(node) =
398	scan_nodes_to_ix(&self.tree, next, end_ix - `1`)
399	{
400	node
401	} else {
402	continue;
403	};
404	self.tree[reference_close_node].item.body =
405	ItemBody::MaybeLinkClose(`false`);
406	let next_node = self.tree[reference_close_node].next;
407
408	(next_node, LinkType::Reference)
409	}
410	// [reference][]
411	RefScan::Collapsed(next_node) => {
412	// This reference has already been tried, and it's not
413	// valid. Skip it.
414	if !could_be_ref {
415	continue;
416	}
417	(next_node, LinkType::Collapsed)
418	}
419	// [shortcut]
420	//
421	// [shortcut]: /blah
422	RefScan::Failed => {
423	if !could_be_ref {
424	continue;
425	}
426	(next, LinkType::Shortcut)
427	}
428	};
429
430	// FIXME: references and labels are mixed in the naming of variables
431	// below. Disambiguate!
432
433	// (label, source_ix end)
434	let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
435	RefScan::LinkLabel(l, end_ix) => {
436	Some((ReferenceLabel::Link(l), end_ix))
437	}
438	RefScan::Collapsed(..) \| RefScan::Failed => {
439	// No label? maybe it is a shortcut reference
440	let label_start = self.tree[tos.node].item.end - `1`;
441	let label_end = self.tree[cur_ix].item.end;
442	scan_link_label(
443	&self.tree,
444	&self.text[label_start..label_end],
445	self.options.contains(Options::ENABLE_FOOTNOTES),
446	)
447	.map(\|(ix, label)\| (label, label_start + ix))
448	.filter(\|(_, end)\| *end == label_end)
449	}
450	};
451
452	// see if it's a footnote reference
453	if let Some((ReferenceLabel::Footnote(l), end)) = label {
454	self.tree[tos.node].next = node_after_link;
455	self.tree[tos.node].child = None;
456	self.tree[tos.node].item.body =
457	ItemBody::FootnoteReference(self.allocs.allocate_cow(l));
458	self.tree[tos.node].item.end = end;
459	prev = Some(tos.node);
460	cur = node_after_link;
461	self.link_stack.clear();
462	continue;
463	} else if let Some((ReferenceLabel::Link(link_label), end)) = label {
464	let type_url_title = self
465	.allocs
466	.refdefs
467	.get(link_label.as_ref())
468	.map(\|matching_def\| {
469	// found a matching definition!
470	let title = matching_def
471	.title
472	.as_ref()
473	.cloned()
474	.unwrap_or_else(\|\| "".into());
475	let url = matching_def.dest.clone();
476	(link_type, url, title)
477	})
478	.or_else(\|\| {
479	match self.broken_link_callback.as_mut() {
480	Some(callback) => {
481	// Construct a BrokenLink struct, which will be passed to the callback
482	let broken_link = BrokenLink {
483	span: (self.tree[tos.node].item.start)..end,
484	link_type,
485	reference: link_label,
486	};
487
488	callback(broken_link).map(\|(url, title)\| {
489	(link_type.to_unknown(), url, title)
490	})
491	}
492	None => None,
493	}
494	});
495
496	if let Some((def_link_type, url, title)) = type_url_title {
497	let link_ix =
498	self.allocs.allocate_link(def_link_type, url, title);
499	self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
500	{
501	ItemBody::Image(link_ix)
502	} else {
503	ItemBody::Link(link_ix)
504	};
505	let label_node = self.tree[tos.node].next;
506
507	// lets do some tree surgery to add the link to the tree
508	// 1st: skip the label node and close node
509	self.tree[tos.node].next = node_after_link;
510
511	// then, if it exists, add the label node as a child to the link node
512	if label_node != cur {
513	self.tree[tos.node].child = label_node;
514
515	// finally: disconnect list of children
516	if let Some(prev_ix) = prev {
517	self.tree[prev_ix].next = None;
518	}
519	}
520
521	self.tree[tos.node].item.end = end;
522
523	// set up cur so next node will be node_after_link
524	cur = Some(tos.node);
525	cur_ix = tos.node;
526
527	if tos.ty == LinkStackTy::Link {
528	self.link_stack.disable_all_links();
529	}
530	}
531	}
532	}
533	}
534	}
535	_ => (),
536	}
537	prev = cur;
538	cur = self.tree[cur_ix].next;
539	}
540	self.link_stack.clear();
541	}
542
543	fn handle_emphasis(&mut self) {
544	let mut prev = None;
545	let mut prev_ix: TreeIndex;
546	let mut cur = self.tree.cur();
547
548	let mut single_quote_open: Option<TreeIndex> = None;
549	let mut double_quote_open: bool = `false`;
550
551	while let Some(mut cur_ix) = cur {
552	match self.tree[cur_ix].item.body {
553	ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
554	let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
555	let both = can_open && can_close;
556	if can_close {
557	while let Some(el) =
558	self.inline_stack.find_match(&mut self.tree, c, count, both)
559	{
560	// have a match!
561	if let Some(prev_ix) = prev {
562	self.tree[prev_ix].next = None;
563	}
564	let match_count = min(count, el.count);
565	// start, end are tree node indices
566	let mut end = cur_ix - `1`;
567	let mut start = el.start + el.count;
568
569	// work from the inside out
570	while start > el.start + el.count - match_count {
571	let inc = if start > el.start + el.count - match_count + `1` {
572	`2`
573	} else {
574	`1`
575	};
576	let ty = if c == b'~' {
577	ItemBody::Strikethrough
578	} else if inc == `2` {
579	ItemBody::Strong
580	} else {
581	ItemBody::Emphasis
582	};
583
584	let root = start - inc;
585	end = end + inc;
586	self.tree[root].item.body = ty;
587	self.tree[root].item.end = self.tree[end].item.end;
588	self.tree[root].child = Some(start);
589	self.tree[root].next = None;
590	start = root;
591	}
592
593	// set next for top most emph level
594	prev_ix = el.start + el.count - match_count;
595	prev = Some(prev_ix);
596	cur = self.tree[cur_ix + match_count - `1`].next;
597	self.tree[prev_ix].next = cur;
598
599	if el.count > match_count {
600	self.inline_stack.push(InlineEl {
601	start: el.start,
602	count: el.count - match_count,
603	c: el.c,
604	both,
605	})
606	}
607	count -= match_count;
608	if count > `0` {
609	cur_ix = cur.unwrap();
610	} else {
611	break;
612	}
613	}
614	}
615	if count > `0` {
616	if can_open {
617	self.inline_stack.push(InlineEl {
618	start: cur_ix,
619	count,
620	c,
621	both,
622	});
623	} else {
624	for i in `0`..count {
625	self.tree[cur_ix + i].item.body = ItemBody::Text;
626	}
627	}
628	prev_ix = cur_ix + count - `1`;
629	prev = Some(prev_ix);
630	cur = self.tree[prev_ix].next;
631	}
632	}
633	ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
634	self.tree[cur_ix].item.body = match c {
635	b'`\'`' => {
636	if let (Some(open_ix), `true`) = (single_quote_open, can_close) {
637	self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
638	single_quote_open = None;
639	} else if can_open {
640	single_quote_open = Some(cur_ix);
641	}
642	ItemBody::SynthesizeChar('’')
643	}
644	_ / double quote / => {
645	if can_close && double_quote_open {
646	double_quote_open = `false`;
647	ItemBody::SynthesizeChar('”')
648	} else {
649	if can_open && !double_quote_open {
650	double_quote_open = `true`;
651	}
652	ItemBody::SynthesizeChar('“')
653	}
654	}
655	};
656	prev = cur;
657	cur = self.tree[cur_ix].next;
658	}
659	_ => {
660	prev = cur;
661	cur = self.tree[cur_ix].next;
662	}
663	}
664	}
665	self.inline_stack.pop_all(&mut self.tree);
666	}
667
668	/// Returns next byte index, url and title.
669	fn scan_inline_link(
670	&self,
671	underlying: &'input str,
672	mut ix: usize,
673	node: Option<TreeIndex>,
674	) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
675	if scan_ch(&underlying.as_bytes()[ix..], b'(') == `0` {
676	return None;
677	}
678	ix += `1`;
679	ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
680
681	let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
682	let dest = unescape(dest);
683	ix += dest_length;
684
685	ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
686
687	let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
688	ix += bytes_scanned;
689	ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace);
690	t
691	} else {
692	"".into()
693	};
694	if scan_ch(&underlying.as_bytes()[ix..], b')') == `0` {
695	return None;
696	}
697	ix += `1`;
698
699	Some((ix, dest, title))
700	}
701
702	// returns (bytes scanned, title cow)
703	fn scan_link_title(
704	&self,
705	text: &'input str,
706	start_ix: usize,
707	node: Option<TreeIndex>,
708	) -> Option<(usize, CowStr<'input>)> {
709	let bytes = text.as_bytes();
710	let open = match bytes.get(start_ix) {
711	Some(b @ b'`\'`') \| Some(b @ b'`\"`') \| Some(b @ b'(') => *b,
712	_ => return None,
713	};
714	let close = if open == b'(' { b')' } else { open };
715
716	let mut title = String::new();
717	let mut mark = start_ix + `1`;
718	let mut i = start_ix + `1`;
719
720	while i < bytes.len() {
721	let c = bytes[i];
722
723	if c == close {
724	let cow = if mark == `1` {
725	(i - start_ix + `1`, text[mark..i].into())
726	} else {
727	title.push_str(&text[mark..i]);
728	(i - start_ix + `1`, title.into())
729	};
730
731	return Some(cow);
732	}
733	if c == open {
734	return None;
735	}
736
737	if c == b'`\n`' \|\| c == b'`\r`' {
738	if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + `1`) {
739	if self.tree[node_ix].item.start > i {
740	title.push_str(&text[mark..i]);
741	title.push('`\n`');
742	i = self.tree[node_ix].item.start;
743	mark = i;
744	continue;
745	}
746	}
747	}
748	if c == b'&' {
749	if let (n, Some(value)) = scan_entity(&bytes[i..]) {
750	title.push_str(&text[mark..i]);
751	title.push_str(&value);
752	i += n;
753	mark = i;
754	continue;
755	}
756	}
757	if c == b'`\\`' && i + `1` < bytes.len() && is_ascii_punctuation(bytes[i + `1`]) {
758	title.push_str(&text[mark..i]);
759	i += `1`;
760	mark = i;
761	}
762
763	i += `1`;
764	}
765
766	None
767	}
768
769	/// Make a code span.
770	///
771	/// Both `open` and `close` are matching MaybeCode items.
772	fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
773	let first_ix = self.tree[open].next.unwrap();
774	let bytes = self.text.as_bytes();
775	let mut span_start = self.tree[open].item.end;
776	let mut span_end = self.tree[close].item.start;
777	let mut buf: Option<String> = None;
778
779	// detect all-space sequences, since they are kept as-is as of commonmark 0.29
780	if !bytes[span_start..span_end].iter().all(\|&b\| b == b' ') {
781	let opening = matches!(bytes[span_start], b' ' \| b'`\r`' \| b'`\n`');
782	let closing = matches!(bytes[span_end - `1`], b' ' \| b'`\r`' \| b'`\n`');
783	let drop_enclosing_whitespace = opening && closing;
784
785	if drop_enclosing_whitespace {
786	span_start += `1`;
787	if span_start < span_end {
788	span_end -= `1`;
789	}
790	}
791
792	let mut ix = first_ix;
793
794	while ix != close {
795	let next_ix = self.tree[ix].next.unwrap();
796	if let ItemBody::HardBreak \| ItemBody::SoftBreak = self.tree[ix].item.body {
797	if drop_enclosing_whitespace {
798	// check whether break should be ignored
799	if ix == first_ix {
800	ix = next_ix;
801	span_start = min(span_end, self.tree[ix].item.start);
802	continue;
803	} else if next_ix == close && ix > first_ix {
804	break;
805	}
806	}
807
808	let end = bytes[self.tree[ix].item.start..]
809	.iter()
810	.position(\|&b\| b == b'`\r`' \|\| b == b'`\n`')
811	.unwrap()
812	+ self.tree[ix].item.start;
813	if let Some(ref mut buf) = buf {
814	buf.push_str(&self.text[self.tree[ix].item.start..end]);
815	buf.push(' ');
816	} else {
817	let mut new_buf = String::with_capacity(span_end - span_start);
818	new_buf.push_str(&self.text[span_start..end]);
819	new_buf.push(' ');
820	buf = Some(new_buf);
821	}
822	} else if let Some(ref mut buf) = buf {
823	let end = if next_ix == close {
824	span_end
825	} else {
826	self.tree[ix].item.end
827	};
828	buf.push_str(&self.text[self.tree[ix].item.start..end]);
829	}
830	ix = next_ix;
831	}
832	}
833
834	let cow = if let Some(buf) = buf {
835	buf.into()
836	} else {
837	self.text[span_start..span_end].into()
838	};
839	if preceding_backslash {
840	self.tree[open].item.body = ItemBody::Text;
841	self.tree[open].item.end = self.tree[open].item.start + `1`;
842	self.tree[open].next = Some(close);
843	self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
844	self.tree[close].item.start = self.tree[open].item.start + `1`;
845	} else {
846	self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
847	self.tree[open].item.end = self.tree[close].item.end;
848	self.tree[open].next = self.tree[close].next;
849	}
850	}
851
852	/// On success, returns a buffer containing the inline html and byte offset.
853	/// When no bytes were skipped, the buffer will be empty and the html can be
854	/// represented as a subslice of the input string.
855	fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
856	let c = *bytes.get(ix)?;
857	if c == b'!' {
858	Some((
859	vec![],
860	scan_inline_html_comment(bytes, ix + `1`, &mut self.html_scan_guard)?,
861	))
862	} else if c == b'?' {
863	Some((
864	vec![],
865	scan_inline_html_processing(bytes, ix + `1`, &mut self.html_scan_guard)?,
866	))
867	} else {
868	let (span, i) = scan_html_block_inner(
869	// Subtract 1 to include the < character
870	&bytes[(ix - `1`)..],
871	Some(&\|bytes\| {
872	let mut line_start = LineStart::new(bytes);
873	let _ = scan_containers(&self.tree, &mut line_start);
874	line_start.bytes_scanned()
875	}),
876	)?;
877	Some((span, i + ix - `1`))
878	}
879	}
880
881	/// Consumes the event iterator and produces an iterator that produces
882	/// `(Event, Range)` pairs, where the `Range` value maps to the corresponding
883	/// range in the markdown source.
884	pub fn into_offset_iter(self) -> OffsetIter<'input, 'callback> {
885	OffsetIter { inner: self }
886	}
887	}
888
889	/// Returns number of containers scanned.
890	pub(crate) fn scan_containers(tree: &Tree<Item>, line_start: &mut LineStart) -> usize {
891	let mut i: usize = `0`;
892	for &node_ix: TreeIndex in tree.walk_spine() {
893	match tree[node_ix].item.body {
894	ItemBody::BlockQuote => {
895	// `scan_blockquote_marker` saves & restores internally
896	if !line_start.scan_blockquote_marker() {
897	break;
898	}
899	}
900	ItemBody::ListItem(indent: usize) => {
901	let save: LineStart<'_> = line_start.clone();
902	if !line_start.scan_space(n_space:indent) && !line_start.is_at_eol() {
903	*line_start = save;
904	break;
905	}
906	}
907	_ => (),
908	}
909	i += `1`;
910	}
911	i
912	}
913
914	impl<'a> Tree<Item> {
915	pub(crate) fn append_text(&mut self, start: usize, end: usize) {
916	if end > start {
917	if let Some(ix: TreeIndex) = self.cur() {
918	if ItemBody::Text == self[ix].item.body && self[ix].item.end == start {
919	self[ix].item.end = end;
920	return;
921	}
922	}
923	self.append(Item {
924	start,
925	end,
926	body: ItemBody::Text,
927	});
928	}
929	}
930	}
931
932	#[derive(Copy, Clone, Debug)]
933	struct InlineEl {
934	start: TreeIndex, // offset of tree node
935	count: usize,
936	c: u8, // b'' or b'_'*
937	both: bool, // can both open and close
938	}
939
940	#[derive(Debug, Clone, Default)]
941	struct InlineStack {
942	stack: Vec<InlineEl>,
943	// Lower bounds for matching indices in the stack. For example
944	// a strikethrough delimiter will never match with any element
945	// in the stack with index smaller than
946	// `lower_bounds[InlineStack::TILDES]`.
947	lower_bounds: [usize; `7`],
948	}
949
950	impl InlineStack {
951	/// These are indices into the lower bounds array.
952	/// Not both refers to the property that the delimiter can not both
953	/// be opener as a closer.
954	const UNDERSCORE_NOT_BOTH: usize = `0`;
955	const ASTERISK_NOT_BOTH: usize = `1`;
956	const ASTERISK_BASE: usize = `2`;
957	const TILDES: usize = `5`;
958	const UNDERSCORE_BOTH: usize = `6`;
959
960	fn pop_all(&mut self, tree: &mut Tree<Item>) {
961	for el in self.stack.drain(..) {
962	for i in `0`..el.count {
963	tree[el.start + i].item.body = ItemBody::Text;
964	}
965	}
966	self.lower_bounds = [`0`; `7`];
967	}
968
969	fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
970	if c == b'_' {
971	if both {
972	self.lower_bounds[InlineStack::UNDERSCORE_BOTH]
973	} else {
974	self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH]
975	}
976	} else if c == b'*' {
977	let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % `3`];
978	if both {
979	mod3_lower
980	} else {
981	min(
982	mod3_lower,
983	self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
984	)
985	}
986	} else {
987	self.lower_bounds[InlineStack::TILDES]
988	}
989	}
990
991	fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
992	if c == b'_' {
993	if both {
994	self.lower_bounds[InlineStack::UNDERSCORE_BOTH] = new_bound;
995	} else {
996	self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
997	}
998	} else if c == b'*' {
999	self.lower_bounds[InlineStack::ASTERISK_BASE + count % `3`] = new_bound;
1000	if !both {
1001	self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1002	}
1003	} else {
1004	self.lower_bounds[InlineStack::TILDES] = new_bound;
1005	}
1006	}
1007
1008	fn find_match(
1009	&mut self,
1010	tree: &mut Tree<Item>,
1011	c: u8,
1012	count: usize,
1013	both: bool,
1014	) -> Option<InlineEl> {
1015	let lowerbound = min(self.stack.len(), self.get_lowerbound(c, count, both));
1016	let res = self.stack[lowerbound..]
1017	.iter()
1018	.cloned()
1019	.enumerate()
1020	.rfind(\|(_, el)\| {
1021	el.c == c && (!both && !el.both \|\| (count + el.count) % `3` != `0` \|\| count % `3` == `0`)
1022	});
1023
1024	if let Some((matching_ix, matching_el)) = res {
1025	let matching_ix = matching_ix + lowerbound;
1026	for el in &self.stack[(matching_ix + `1`)..] {
1027	for i in `0`..el.count {
1028	tree[el.start + i].item.body = ItemBody::Text;
1029	}
1030	}
1031	self.stack.truncate(matching_ix);
1032	Some(matching_el)
1033	} else {
1034	self.set_lowerbound(c, count, both, self.stack.len());
1035	None
1036	}
1037	}
1038
1039	fn push(&mut self, el: InlineEl) {
1040	self.stack.push(el)
1041	}
1042	}
1043
1044	#[derive(Debug, Clone)]
1045	enum RefScan<'a> {
1046	// label, source ix of label end
1047	LinkLabel(CowStr<'a>, usize),
1048	// contains next node index
1049	Collapsed(Option<TreeIndex>),
1050	Failed,
1051	}
1052
1053	/// Skips forward within a block to a node which spans (ends inclusive) the given
1054	/// index into the source.
1055	fn scan_nodes_to_ix(
1056	tree: &Tree<Item>,
1057	mut node: Option<TreeIndex>,
1058	ix: usize,
1059	) -> Option<TreeIndex> {
1060	while let Some(node_ix: TreeIndex) = node {
1061	if tree[node_ix].item.end <= ix {
1062	node = tree[node_ix].next;
1063	} else {
1064	break;
1065	}
1066	}
1067	node
1068	}
1069
1070	/// Scans an inline link label, which cannot be interrupted.
1071	/// Returns number of bytes (including brackets) and label on success.
1072	fn scan_link_label<'text, 'tree>(
1073	tree: &'tree Tree<Item>,
1074	text: &'text str,
1075	allow_footnote_refs: bool,
1076	) -> Option<(usize, ReferenceLabel<'text>)> {
1077	let bytes: &&[u8] = &text.as_bytes();
1078	if bytes.len() < `2` \|\| bytes[`0`] != b'[' {
1079	return None;
1080	}
1081	let linebreak_handler: impl Fn(&[u8]) -> Option<…> = \|bytes: &[u8]\| {
1082	let mut line_start: LineStart<'_> = LineStart::new(bytes);
1083	let _ = scan_containers(tree, &mut line_start);
1084	Some(line_start.bytes_scanned())
1085	};
1086	let pair: (usize, ReferenceLabel<'_>) = if allow_footnote_refs && b'^' == bytes[`1`] {
1087	let (byte_index: usize, cow: CowStr<'_>) = scan_link_label_rest(&text[`2`..], &linebreak_handler)?;
1088	(byte_index + `2`, ReferenceLabel::Footnote(cow))
1089	} else {
1090	let (byte_index: usize, cow: CowStr<'_>) = scan_link_label_rest(&text[`1`..], &linebreak_handler)?;
1091	(byte_index + `1`, ReferenceLabel::Link(cow))
1092	};
1093	Some(pair)
1094	}
1095
1096	fn scan_reference<'a, 'b>(
1097	tree: &'a Tree<Item>,
1098	text: &'b str,
1099	cur: Option<TreeIndex>,
1100	allow_footnote_refs: bool,
1101	) -> RefScan<'b> {
1102	let cur_ix: TreeIndex = match cur {
1103	None => return RefScan::Failed,
1104	Some(cur_ix: TreeIndex) => cur_ix,
1105	};
1106	let start: usize = tree[cur_ix].item.start;
1107	let tail: &[u8] = &text.as_bytes()[start..];
1108
1109	if tail.starts_with(needle:b"[]") {
1110	// TODO: this unwrap is sus and should be looked at closer
1111	let closing_node: TreeIndex = tree[cur_ix].next.unwrap();
1112	RefScan::Collapsed(tree[closing_node].next)
1113	} else if let Some((ix: usize, ReferenceLabel::Link(label: CowStr<'_>))) =
1114	scan_link_label(tree, &text[start..], allow_footnote_refs)
1115	{
1116	RefScan::LinkLabel(label, start + ix)
1117	} else {
1118	RefScan::Failed
1119	}
1120	}
1121
1122	#[derive(Clone, Default)]
1123	struct LinkStack {
1124	inner: Vec<LinkStackEl>,
1125	disabled_ix: usize,
1126	}
1127
1128	impl LinkStack {
1129	fn push(&mut self, el: LinkStackEl) {
1130	self.inner.push(el);
1131	}
1132
1133	fn pop(&mut self) -> Option<LinkStackEl> {
1134	let el = self.inner.pop();
1135	self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len());
1136	el
1137	}
1138
1139	fn clear(&mut self) {
1140	self.inner.clear();
1141	self.disabled_ix = `0`;
1142	}
1143
1144	fn disable_all_links(&mut self) {
1145	for el in &mut self.inner[self.disabled_ix..] {
1146	if el.ty == LinkStackTy::Link {
1147	el.ty = LinkStackTy::Disabled;
1148	}
1149	}
1150	self.disabled_ix = self.inner.len();
1151	}
1152	}
1153
1154	#[derive(Clone, Debug)]
1155	struct LinkStackEl {
1156	node: TreeIndex,
1157	ty: LinkStackTy,
1158	}
1159
1160	#[derive(PartialEq, Clone, Debug)]
1161	enum LinkStackTy {
1162	Link,
1163	Image,
1164	Disabled,
1165	}
1166
1167	/// Contains the destination URL, title and source span of a reference definition.
1168	#[derive(Clone, Debug)]
1169	pub struct LinkDef<'a> {
1170	pub dest: CowStr<'a>,
1171	pub title: Option<CowStr<'a>>,
1172	pub span: Range<usize>,
1173	}
1174
1175	/// Tracks tree indices of code span delimiters of each length. It should prevent
1176	/// quadratic scanning behaviours by providing (amortized) constant time lookups.
1177	struct CodeDelims {
1178	inner: HashMap<usize, VecDeque<TreeIndex>>,
1179	seen_first: bool,
1180	}
1181
1182	impl CodeDelims {
1183	fn new() -> Self {
1184	Self {
1185	inner: Default::default(),
1186	seen_first: `false`,
1187	}
1188	}
1189
1190	fn insert(&mut self, count: usize, ix: TreeIndex) {
1191	if self.seen_first {
1192	self.inner
1193	.entry(count)
1194	.or_insert_with(Default::default)
1195	.push_back(ix);
1196	} else {
1197	// Skip the first insert, since that delimiter will always
1198	// be an opener and not a closer.
1199	self.seen_first = `true`;
1200	}
1201	}
1202
1203	fn is_populated(&self) -> bool {
1204	!self.inner.is_empty()
1205	}
1206
1207	fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
1208	while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
1209	if ix > open_ix {
1210	return Some(ix);
1211	}
1212	}
1213	None
1214	}
1215
1216	fn clear(&mut self) {
1217	self.inner.clear();
1218	self.seen_first = `false`;
1219	}
1220	}
1221
1222	#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1223	pub(crate) struct LinkIndex(usize);
1224
1225	#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1226	pub(crate) struct CowIndex(usize);
1227
1228	#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1229	pub(crate) struct AlignmentIndex(usize);
1230
1231	#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1232	pub(crate) struct HeadingIndex(NonZeroUsize);
1233
1234	#[derive(Clone)]
1235	pub(crate) struct Allocations<'a> {
1236	pub refdefs: RefDefs<'a>,
1237	links: Vec<(LinkType, CowStr<'a>, CowStr<'a>)>,
1238	cows: Vec<CowStr<'a>>,
1239	alignments: Vec<Vec<Alignment>>,
1240	headings: Vec<HeadingAttributes<'a>>,
1241	}
1242
1243	/// Used by the heading attributes extension.
1244	#[derive(Clone)]
1245	pub(crate) struct HeadingAttributes<'a> {
1246	pub id: Option<&'a str>,
1247	pub classes: Vec<&'a str>,
1248	}
1249
1250	/// Keeps track of the reference definitions defined in the document.
1251	#[derive(Clone, Default, Debug)]
1252	pub struct RefDefs<'input>(pub(crate) HashMap<LinkLabel<'input>, LinkDef<'input>>);
1253
1254	impl<'input, 'b, 's> RefDefs<'input>
1255	where
1256	's: 'b,
1257	{
1258	/// Performs a lookup on reference label using unicode case folding.
1259	pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
1260	self.0.get(&UniCase::new(key.into()))
1261	}
1262
1263	/// Provides an iterator over all the document's reference definitions.
1264	pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
1265	self.0.iter().map(\|(k: &UniCase>, v: &LinkDef<'_>)\| (k.as_ref(), v))
1266	}
1267	}
1268
1269	impl<'a> Allocations<'a> {
1270	pub fn new() -> Self {
1271	Self {
1272	refdefs: RefDefs::default(),
1273	links: Vec::with_capacity(`128`),
1274	cows: Vec::new(),
1275	alignments: Vec::new(),
1276	headings: Vec::new(),
1277	}
1278	}
1279
1280	pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
1281	let ix = self.cows.len();
1282	self.cows.push(cow);
1283	CowIndex(ix)
1284	}
1285
1286	pub fn allocate_link(&mut self, ty: LinkType, url: CowStr<'a>, title: CowStr<'a>) -> LinkIndex {
1287	let ix = self.links.len();
1288	self.links.push((ty, url, title));
1289	LinkIndex(ix)
1290	}
1291
1292	pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
1293	let ix = self.alignments.len();
1294	self.alignments.push(alignment);
1295	AlignmentIndex(ix)
1296	}
1297
1298	pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
1299	let ix = self.headings.len();
1300	self.headings.push(attrs);
1301	// This won't panic. `self.headings.len()` can't be `usize::MAX` since
1302	// such a long Vec cannot fit in memory.
1303	let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(`1`)).expect("too many headings");
1304	HeadingIndex(ix_nonzero)
1305	}
1306	}
1307
1308	impl<'a> Index<CowIndex> for Allocations<'a> {
1309	type Output = CowStr<'a>;
1310
1311	fn index(&self, ix: CowIndex) -> &Self::Output {
1312	self.cows.index(ix.0)
1313	}
1314	}
1315
1316	impl<'a> Index<LinkIndex> for Allocations<'a> {
1317	type Output = (LinkType, CowStr<'a>, CowStr<'a>);
1318
1319	fn index(&self, ix: LinkIndex) -> &Self::Output {
1320	self.links.index(ix.0)
1321	}
1322	}
1323
1324	impl<'a> Index<AlignmentIndex> for Allocations<'a> {
1325	type Output = Vec<Alignment>;
1326
1327	fn index(&self, ix: AlignmentIndex) -> &Self::Output {
1328	self.alignments.index(ix.0)
1329	}
1330	}
1331
1332	impl<'a> Index<HeadingIndex> for Allocations<'a> {
1333	type Output = HeadingAttributes<'a>;
1334
1335	fn index(&self, ix: HeadingIndex) -> &Self::Output {
1336	self.headings.index(ix.0.get() - `1`)
1337	}
1338	}
1339
1340	/// A struct containing information on the reachability of certain inline HTML
1341	/// elements. In particular, for cdata elements (`<![CDATA[`), processing
1342	/// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes
1343	/// represent the indices before which a scan will always fail and can hence
1344	/// be skipped.
1345	#[derive(Clone, Default)]
1346	pub(crate) struct HtmlScanGuard {
1347	pub cdata: usize,
1348	pub processing: usize,
1349	pub declaration: usize,
1350	}
1351
1352	pub type BrokenLinkCallback<'input, 'borrow> =
1353	Option<&'borrow mut dyn FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>>;
1354
1355	/// Markdown event and source range iterator.
1356	///
1357	/// Generates tuples where the first element is the markdown event and the second
1358	/// is a the corresponding range in the source string.
1359	///
1360	/// Constructed from a `Parser` using its
1361	/// [`into_offset_iter`](struct.Parser.html#method.into_offset_iter) method.
1362	#[derive(Debug)]
1363	pub struct OffsetIter<'a, 'b> {
1364	inner: Parser<'a, 'b>,
1365	}
1366
1367	impl<'a, 'b> OffsetIter<'a, 'b> {
1368	/// Returns a reference to the internal reference definition tracker.
1369	pub fn reference_definitions(&self) -> &RefDefs {
1370	self.inner.reference_definitions()
1371	}
1372	}
1373
1374	impl<'a, 'b> Iterator for OffsetIter<'a, 'b> {
1375	type Item = (Event<'a>, Range<usize>);
1376
1377	fn next(&mut self) -> Option<Self::Item> {
1378	match self.inner.tree.cur() {
1379	None => {
1380	let ix = self.inner.tree.pop()?;
1381	let tag = item_to_tag(&self.inner.tree[ix].item, &self.inner.allocs);
1382	self.inner.tree.next_sibling(ix);
1383	let span = self.inner.tree[ix].item.start..self.inner.tree[ix].item.end;
1384	debug_assert!(span.start <= span.end);
1385	Some((Event::End(tag), span))
1386	}
1387	Some(cur_ix) => {
1388	if self.inner.tree[cur_ix].item.body.is_inline() {
1389	self.inner.handle_inline();
1390	}
1391
1392	let node = self.inner.tree[cur_ix];
1393	let item = node.item;
1394	let event = item_to_event(item, self.inner.text, &self.inner.allocs);
1395	if let Event::Start(..) = event {
1396	self.inner.tree.push();
1397	} else {
1398	self.inner.tree.next_sibling(cur_ix);
1399	}
1400	debug_assert!(item.start <= item.end);
1401	Some((event, item.start..item.end))
1402	}
1403	}
1404	}
1405	}
1406
1407	fn item_to_tag<'a>(item: &Item, allocs: &Allocations<'a>) -> Tag<'a> {
1408	match item.body {
1409	ItemBody::Paragraph => Tag::Paragraph,
1410	ItemBody::Emphasis => Tag::Emphasis,
1411	ItemBody::Strong => Tag::Strong,
1412	ItemBody::Strikethrough => Tag::Strikethrough,
1413	ItemBody::Link(link_ix) => {
1414	let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
1415	Tag::Link(*link_type, url.clone(), title.clone())
1416	}
1417	ItemBody::Image(link_ix) => {
1418	let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
1419	Tag::Image(*link_type, url.clone(), title.clone())
1420	}
1421	ItemBody::Heading(level, Some(heading_ix)) => {
1422	let HeadingAttributes { id, classes } = allocs.index(heading_ix);
1423	Tag::Heading(level, *id, classes.clone())
1424	}
1425	ItemBody::Heading(level, None) => Tag::Heading(level, None, Vec::new()),
1426	ItemBody::FencedCodeBlock(cow_ix) => {
1427	Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone()))
1428	}
1429	ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
1430	ItemBody::BlockQuote => Tag::BlockQuote,
1431	ItemBody::List(_, c, listitem_start) => {
1432	if c == b'.' \|\| c == b')' {
1433	Tag::List(Some(listitem_start))
1434	} else {
1435	Tag::List(None)
1436	}
1437	}
1438	ItemBody::ListItem(_) => Tag::Item,
1439	ItemBody::TableHead => Tag::TableHead,
1440	ItemBody::TableCell => Tag::TableCell,
1441	ItemBody::TableRow => Tag::TableRow,
1442	ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()),
1443	ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()),
1444	_ => panic!("unexpected item body {:?}", item.body),
1445	}
1446	}
1447
1448	fn item_to_event<'a>(item: Item, text: &'a str, allocs: &Allocations<'a>) -> Event<'a> {
1449	let tag = match item.body {
1450	ItemBody::Text => return Event::Text(text[item.start..item.end].into()),
1451	ItemBody::Code(cow_ix) => return Event::Code(allocs[cow_ix].clone()),
1452	ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs[cow_ix].clone()),
1453	ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
1454	ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
1455	ItemBody::OwnedHtml(cow_ix) => return Event::Html(allocs[cow_ix].clone()),
1456	ItemBody::SoftBreak => return Event::SoftBreak,
1457	ItemBody::HardBreak => return Event::HardBreak,
1458	ItemBody::FootnoteReference(cow_ix) => {
1459	return Event::FootnoteReference(allocs[cow_ix].clone())
1460	}
1461	ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
1462	ItemBody::Rule => return Event::Rule,
1463
1464	ItemBody::Paragraph => Tag::Paragraph,
1465	ItemBody::Emphasis => Tag::Emphasis,
1466	ItemBody::Strong => Tag::Strong,
1467	ItemBody::Strikethrough => Tag::Strikethrough,
1468	ItemBody::Link(link_ix) => {
1469	let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
1470	Tag::Link(*link_type, url.clone(), title.clone())
1471	}
1472	ItemBody::Image(link_ix) => {
1473	let &(ref link_type, ref url, ref title) = allocs.index(link_ix);
1474	Tag::Image(*link_type, url.clone(), title.clone())
1475	}
1476	ItemBody::Heading(level, Some(heading_ix)) => {
1477	let HeadingAttributes { id, classes } = allocs.index(heading_ix);
1478	Tag::Heading(level, *id, classes.clone())
1479	}
1480	ItemBody::Heading(level, None) => Tag::Heading(level, None, Vec::new()),
1481	ItemBody::FencedCodeBlock(cow_ix) => {
1482	Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone()))
1483	}
1484	ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
1485	ItemBody::BlockQuote => Tag::BlockQuote,
1486	ItemBody::List(_, c, listitem_start) => {
1487	if c == b'.' \|\| c == b')' {
1488	Tag::List(Some(listitem_start))
1489	} else {
1490	Tag::List(None)
1491	}
1492	}
1493	ItemBody::ListItem(_) => Tag::Item,
1494	ItemBody::TableHead => Tag::TableHead,
1495	ItemBody::TableCell => Tag::TableCell,
1496	ItemBody::TableRow => Tag::TableRow,
1497	ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()),
1498	ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()),
1499	_ => panic!("unexpected item body {:?}", item.body),
1500	};
1501
1502	Event::Start(tag)
1503	}
1504
1505	impl<'a, 'b> Iterator for Parser<'a, 'b> {
1506	type Item = Event<'a>;
1507
1508	fn next(&mut self) -> Option<Event<'a>> {
1509	match self.tree.cur() {
1510	None => {
1511	let ix = self.tree.pop()?;
1512	let tag = item_to_tag(&self.tree[ix].item, &self.allocs);
1513	self.tree.next_sibling(ix);
1514	Some(Event::End(tag))
1515	}
1516	Some(cur_ix) => {
1517	if self.tree[cur_ix].item.body.is_inline() {
1518	self.handle_inline();
1519	}
1520
1521	let node = self.tree[cur_ix];
1522	let item = node.item;
1523	let event = item_to_event(item, self.text, &self.allocs);
1524	if let Event::Start(..) = event {
1525	self.tree.push();
1526	} else {
1527	self.tree.next_sibling(cur_ix);
1528	}
1529	Some(event)
1530	}
1531	}
1532	}
1533	}
1534
1535	impl FusedIterator for Parser<'_, '_> {}
1536
1537	#[cfg(test)]
1538	mod test {
1539	use super::*;
1540	use crate::tree::Node;
1541
1542	// TODO: move these tests to tests/html.rs?
1543
1544	fn parser_with_extensions(text: &str) -> Parser<'_, 'static> {
1545	let mut opts = Options::empty();
1546	opts.insert(Options::ENABLE_TABLES);
1547	opts.insert(Options::ENABLE_FOOTNOTES);
1548	opts.insert(Options::ENABLE_STRIKETHROUGH);
1549	opts.insert(Options::ENABLE_TASKLISTS);
1550
1551	Parser::new_ext(text, opts)
1552	}
1553
1554	#[test]
1555	#[cfg(target_pointer_width = "64")]
1556	fn node_size() {
1557	let node_size = std::mem::size_of::<Node<Item>>();
1558	assert_eq!(`48`, node_size);
1559	}
1560
1561	#[test]
1562	#[cfg(target_pointer_width = "64")]
1563	fn body_size() {
1564	let body_size = std::mem::size_of::<ItemBody>();
1565	assert_eq!(`16`, body_size);
1566	}
1567
1568	#[test]
1569	fn single_open_fish_bracket() {
1570	// dont crash
1571	assert_eq!(`3`, Parser::new("<").count());
1572	}
1573
1574	#[test]
1575	fn lone_hashtag() {
1576	// dont crash
1577	assert_eq!(`2`, Parser::new("#").count());
1578	}
1579
1580	#[test]
1581	fn lots_of_backslashes() {
1582	// dont crash
1583	Parser::new("`\\\\\r\r`").count();
1584	Parser::new("`\\\r\r\\`.`\\\\\r\r\\`.`\\`").count();
1585	}
1586
1587	#[test]
1588	fn issue_320() {
1589	// dont crash
1590	parser_with_extensions(":`\r\t`> \|`\r`:`\r\t`> \|`\r`").count();
1591	}
1592
1593	#[test]
1594	fn issue_319() {
1595	// dont crash
1596	parser_with_extensions("\|`\r`-]([^\|`\r`-]([^").count();
1597	parser_with_extensions("\|`\r\r`=][^\|`\r\r`=][^car").count();
1598	}
1599
1600	#[test]
1601	fn issue_303() {
1602	// dont crash
1603	parser_with_extensions("[^`\r\r`a]").count();
1604	parser_with_extensions("`\r\r`]Z[^`\x00\r\r`]Z[^`\x00`").count();
1605	}
1606
1607	#[test]
1608	fn issue_313() {
1609	// dont crash
1610	parser_with_extensions("]0[^`\r\r`]0[^").count();
1611	parser_with_extensions("[^`\r`> `][^`\r`> `][^`\r`> `][").count();
1612	}
1613
1614	#[test]
1615	fn issue_311() {
1616	// dont crash
1617	parser_with_extensions("`\\\u{0d}`-`\u{09}\\\u{0d}`-`\u{09}`").count();
1618	}
1619
1620	#[test]
1621	fn issue_283() {
1622	let input = std::str::from_utf8(b"`\xf0\x9b\xb2\x9f`<td:^`\xf0\x9b\xb2\x9f`").unwrap();
1623	// dont crash
1624	parser_with_extensions(input).count();
1625	}
1626
1627	#[test]
1628	fn issue_289() {
1629	// dont crash
1630	parser_with_extensions("> - `\\\n`> - ").count();
1631	parser_with_extensions("- `\n\n`").count();
1632	}
1633
1634	#[test]
1635	fn issue_306() {
1636	// dont crash
1637	parser_with_extensions("`\r`_<__`\r`_<__`\r`_<__`\r`_<__").count();
1638	}
1639
1640	#[test]
1641	fn issue_305() {
1642	// dont crash
1643	parser_with_extensions("_6*6_*").count();
1644	}
1645
1646	#[test]
1647	fn another_emphasis_panic() {
1648	parser_with_extensions("__#_#__").count();
1649	}
1650
1651	#[test]
1652	fn offset_iter() {
1653	let event_offsets: Vec<_> = Parser::new("hello world")
1654	.into_offset_iter()
1655	.map(\|(_ev, range)\| range)
1656	.collect();
1657	let expected_offsets = vec![(`0`..`13`), (`0`..`7`), (`1`..`6`), (`0`..`7`), (`7`..`13`), (`0`..`13`)];
1658	assert_eq!(expected_offsets, event_offsets);
1659	}
1660
1661	#[test]
1662	fn reference_link_offsets() {
1663	let range =
1664	Parser::new("# H1`\n`[testing][Some reference]`\n\n`[Some reference]: https://github.com")
1665	.into_offset_iter()
1666	.filter_map(\|(ev, range)\| match ev {
1667	Event::Start(Tag::Link(LinkType::Reference, ..), ..) => Some(range),
1668	_ => None,
1669	})
1670	.next()
1671	.unwrap();
1672	assert_eq!(`5`..`30`, range);
1673	}
1674
1675	#[test]
1676	fn footnote_offsets() {
1677	let range = parser_with_extensions("Testing this[^1] out.`\n\n`[^1]: Footnote.")
1678	.into_offset_iter()
1679	.filter_map(\|(ev, range)\| match ev {
1680	Event::FootnoteReference(..) => Some(range),
1681	_ => None,
1682	})
1683	.next()
1684	.unwrap();
1685	assert_eq!(`12`..`16`, range);
1686	}
1687
1688	#[test]
1689	fn table_offset() {
1690	let markdown = "a`\n\n`Testing\|This\|Outtt`\n`--\|:--:\|--:`\n`Some Data\|Other data\|asdf";
1691	let event_offset = parser_with_extensions(markdown)
1692	.into_offset_iter()
1693	.map(\|(_ev, range)\| range)
1694	.nth(`3`)
1695	.unwrap();
1696	let expected_offset = `3`..`59`;
1697	assert_eq!(expected_offset, event_offset);
1698	}
1699
1700	#[test]
1701	fn table_cell_span() {
1702	let markdown = "a\|b\|c`\n`--\|--\|--`\n`a\| \|c";
1703	let event_offset = parser_with_extensions(markdown)
1704	.into_offset_iter()
1705	.filter_map(\|(ev, span)\| match ev {
1706	Event::Start(Tag::TableCell) => Some(span),
1707	_ => None,
1708	})
1709	.nth(`4`)
1710	.unwrap();
1711	let expected_offset_start = "a\|b\|c`\n`--\|--\|--`\n`a\|".len();
1712	assert_eq!(
1713	expected_offset_start..(expected_offset_start + `2`),
1714	event_offset
1715	);
1716	}
1717
1718	#[test]
1719	fn offset_iter_issue_378() {
1720	let event_offsets: Vec<_> = Parser::new("a [b](c) d")
1721	.into_offset_iter()
1722	.map(\|(_ev, range)\| range)
1723	.collect();
1724	let expected_offsets = vec![(`0`..`10`), (`0`..`2`), (`2`..`8`), (`3`..`4`), (`2`..`8`), (`8`..`10`), (`0`..`10`)];
1725	assert_eq!(expected_offsets, event_offsets);
1726	}
1727
1728	#[test]
1729	fn offset_iter_issue_404() {
1730	let event_offsets: Vec<_> = Parser::new("###`\n`")
1731	.into_offset_iter()
1732	.map(\|(_ev, range)\| range)
1733	.collect();
1734	let expected_offsets = vec![(`0`..`4`), (`0`..`4`)];
1735	assert_eq!(expected_offsets, event_offsets);
1736	}
1737
1738	// FIXME: add this one regression suite
1739	#[test]
1740	fn link_def_at_eof() {
1741	let test_str = "[My site][world]`\n\n`[world]: https://vincentprouillet.com";
1742	let expected = "<p><a href=`\"`https://vincentprouillet.com`\"`>My site</a></p>`\n`";
1743
1744	let mut buf = String::new();
1745	crate::html::push_html(&mut buf, Parser::new(test_str));
1746	assert_eq!(expected, buf);
1747	}
1748
1749	#[test]
1750	fn no_footnote_refs_without_option() {
1751	let test_str = "a [^a]`\n\n`[^a]: yolo";
1752	let expected = "<p>a <a href=`\"`yolo`\"`>^a</a></p>`\n`";
1753
1754	let mut buf = String::new();
1755	crate::html::push_html(&mut buf, Parser::new(test_str));
1756	assert_eq!(expected, buf);
1757	}
1758
1759	#[test]
1760	fn ref_def_at_eof() {
1761	let test_str = "[test]:`\\`";
1762	let expected = "";
1763
1764	let mut buf = String::new();
1765	crate::html::push_html(&mut buf, Parser::new(test_str));
1766	assert_eq!(expected, buf);
1767	}
1768
1769	#[test]
1770	fn ref_def_cr_lf() {
1771	let test_str = "[a]: /u`\r\n\n`[a]";
1772	let expected = "<p><a href=`\"`/u`\"`>a</a></p>`\n`";
1773
1774	let mut buf = String::new();
1775	crate::html::push_html(&mut buf, Parser::new(test_str));
1776	assert_eq!(expected, buf);
1777	}
1778
1779	#[test]
1780	fn no_dest_refdef() {
1781	let test_str = "[a]:";
1782	let expected = "<p>[a]:</p>`\n`";
1783
1784	let mut buf = String::new();
1785	crate::html::push_html(&mut buf, Parser::new(test_str));
1786	assert_eq!(expected, buf);
1787	}
1788
1789	#[test]
1790	fn broken_links_called_only_once() {
1791	for &(markdown, expected) in &[
1792	("See also [`g()`][crate::g].", `1`),
1793	("See also [`g()`][crate::g][].", `1`),
1794	("[brokenlink1] some other node [brokenlink2]", `2`),
1795	] {
1796	let mut times_called = `0`;
1797	let callback = &mut \|_broken_link: BrokenLink\| {
1798	times_called += `1`;
1799	None
1800	};
1801	let parser =
1802	Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
1803	for _ in parser {}
1804	assert_eq!(times_called, expected);
1805	}
1806	}
1807
1808	#[test]
1809	fn simple_broken_link_callback() {
1810	let test_str = "This is a link w/o def: [hello][world]";
1811	let mut callback = \|broken_link: BrokenLink\| {
1812	assert_eq!("world", broken_link.reference.as_ref());
1813	assert_eq!(&test_str[broken_link.span], "[hello][world]");
1814	let url = "YOLO".into();
1815	let title = "SWAG".to_owned().into();
1816	Some((url, title))
1817	};
1818	let parser =
1819	Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
1820	let mut link_tag_count = `0`;
1821	for (typ, url, title) in parser.filter_map(\|event\| match event {
1822	Event::Start(tag) \| Event::End(tag) => match tag {
1823	Tag::Link(typ, url, title) => Some((typ, url, title)),
1824	_ => None,
1825	},
1826	_ => None,
1827	}) {
1828	link_tag_count += `1`;
1829	assert_eq!(typ, LinkType::ReferenceUnknown);
1830	assert_eq!(url.as_ref(), "YOLO");
1831	assert_eq!(title.as_ref(), "SWAG");
1832	}
1833	assert!(link_tag_count > `0`);
1834	}
1835
1836	#[test]
1837	fn code_block_kind_check_fenced() {
1838	let parser = Parser::new("hello`\n````test`\n`tadam`\n````");
1839	let mut found = `0`;
1840	for (ev, _range) in parser.into_offset_iter() {
1841	match ev {
1842	Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) => {
1843	assert_eq!(syntax.as_ref(), "test");
1844	found += `1`;
1845	}
1846	_ => {}
1847	}
1848	}
1849	assert_eq!(found, `1`);
1850	}
1851
1852	#[test]
1853	fn code_block_kind_check_indented() {
1854	let parser = Parser::new("hello`\n\n` ```test`\n` tadam`\n`hello");
1855	let mut found = `0`;
1856	for (ev, _range) in parser.into_offset_iter() {
1857	match ev {
1858	Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
1859	found += `1`;
1860	}
1861	_ => {}
1862	}
1863	}
1864	assert_eq!(found, `1`);
1865	}
1866
1867	#[test]
1868	fn ref_defs() {
1869	let input = r###"[a B c]: http://example.com
1870	[another]: https://google.com
1871
1872	text
1873
1874	[final ONE]: http://wikipedia.org
1875	"###;
1876	let mut parser = Parser::new(input);
1877
1878	assert!(parser.reference_definitions().get("a b c").is_some());
1879	assert!(parser.reference_definitions().get("nope").is_none());
1880
1881	if let Some(_event) = parser.next() {
1882	// testing keys with shorter lifetimes than parser and its input
1883	let s = "final one".to_owned();
1884	let link_def = parser.reference_definitions().get(&s).unwrap();
1885	let span = &input[link_def.span.clone()];
1886	assert_eq!(span, "[final ONE]: http://wikipedia.org");
1887	}
1888	}
1889
1890	#[test]
1891	fn common_lifetime_patterns_allowed<'b>() {
1892	let temporary_str = String::from("xyz");
1893
1894	// NOTE: this is a limitation of Rust, it doesn't allow putting lifetime parameters on the closure itself.
1895	// Hack it by attaching the lifetime to the test function instead.
1896	// TODO: why is the `'b` lifetime required at all? Changing it to `'_` breaks things :(
1897	let mut closure = \|link: BrokenLink<'b>\| Some(("#".into(), link.reference.into()));
1898
1899	fn function<'a>(link: BrokenLink<'a>) -> Option<(CowStr<'a>, CowStr<'a>)> {
1900	Some(("#".into(), link.reference))
1901	}
1902
1903	for _ in Parser::new_with_broken_link_callback(
1904	"static lifetime",
1905	Options::empty(),
1906	Some(&mut closure),
1907	) {}
1908	/ This fails to compile. Because the closure can't say `for <'a> fn(BrokenLink<'a>) ->*
1909	* CowStr<'a>` and has to use the enclosing `'b` lifetime parameter, `temporary_str` lives
1910	* shorter than `'b`. I think this is unlikely to occur in real life, and if it does, the
1911	* fix is simple: move it out to a function that allows annotating the lifetimes.
1912	*/
1913	//for _ in Parser::new_with_broken_link_callback(&temporary_str, Options::empty(), Some(&mut callback)) {
1914	//}
1915
1916	for _ in Parser::new_with_broken_link_callback(
1917	"static lifetime",
1918	Options::empty(),
1919	Some(&mut function),
1920	) {}
1921	for _ in Parser::new_with_broken_link_callback(
1922	&temporary_str,
1923	Options::empty(),
1924	Some(&mut function),
1925	) {}
1926	}
1927	}
1928