parse.rs source code [crates/pulldown_cmark/src/parse.rs]

1	// Copyright 2017 Google Inc. All rights reserved.
2	//
3	// Permission is hereby granted, free of charge, to any person obtaining a copy
4	// of this software and associated documentation files (the "Software"), to deal
5	// in the Software without restriction, including without limitation the rights
6	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7	// copies of the Software, and to permit persons to whom the Software is
8	// furnished to do so, subject to the following conditions:
9	//
10	// The above copyright notice and this permission notice shall be included in
11	// all copies or substantial portions of the Software.
12	//
13	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19	// THE SOFTWARE.
20
21	//! Tree-based two pass parser.
22
23	use std::cmp::{max, min};
24	use std::collections::{HashMap, VecDeque};
25	use std::iter::FusedIterator;
26	use std::num::NonZeroUsize;
27	use std::ops::{Index, Range};
28
29	use unicase::UniCase;
30
31	use crate::firstpass::run_first_pass;
32	use crate::linklabel::{scan_link_label_rest, FootnoteLabel, LinkLabel, ReferenceLabel};
33	use crate::scanners::*;
34	use crate::strings::CowStr;
35	use crate::tree::{Tree, TreeIndex};
36	use crate::{
37	Alignment, BlockQuoteKind, CodeBlockKind, Event, HeadingLevel, LinkType, MetadataBlockKind,
38	Options, Tag, TagEnd,
39	};
40
41	// Allowing arbitrary depth nested parentheses inside link destinations
42	// can create denial of service vulnerabilities if we're not careful.
43	// The simplest countermeasure is to limit their depth, which is
44	// explicitly allowed by the spec as long as the limit is at least 3:
45	// https://spec.commonmark.org/0.29/#link-destination
46	pub(crate) const LINK_MAX_NESTED_PARENS: usize = `32`;
47
48	#[derive(Debug, Default, Clone, Copy)]
49	pub(crate) struct Item {
50	pub start: usize,
51	pub end: usize,
52	pub body: ItemBody,
53	}
54
55	#[derive(Debug, PartialEq, Clone, Copy, Default)]
56	pub(crate) enum ItemBody {
57	Paragraph,
58	Text {
59	backslash_escaped: bool,
60	},
61	SoftBreak,
62	// true = is backlash
63	HardBreak(bool),
64
65	// These are possible inline items, need to be resolved in second pass.
66
67	// repeats, can_open, can_close
68	MaybeEmphasis(usize, bool, bool),
69	// can_open, can_close, brace context
70	MaybeMath(bool, bool, u8),
71	// quote byte, can_open, can_close
72	MaybeSmartQuote(u8, bool, bool),
73	MaybeCode(usize, bool), // number of backticks, preceded by backslash
74	MaybeHtml,
75	MaybeLinkOpen,
76	// bool indicates whether or not the preceding section could be a reference
77	MaybeLinkClose(bool),
78	MaybeImage,
79
80	// These are inline items after resolution.
81	Emphasis,
82	Strong,
83	Strikethrough,
84	Math(CowIndex, bool), // true for display math
85	Code(CowIndex),
86	Link(LinkIndex),
87	Image(LinkIndex),
88	FootnoteReference(CowIndex),
89	TaskListMarker(bool), // true for checked
90
91	Rule,
92	Heading(HeadingLevel, Option<HeadingIndex>), // heading level
93	FencedCodeBlock(CowIndex),
94	IndentCodeBlock,
95	HtmlBlock,
96	InlineHtml,
97	Html,
98	OwnedHtml(CowIndex),
99	BlockQuote(Option<BlockQuoteKind>),
100	List(bool, u8, u64), // is_tight, list character, list start index
101	ListItem(usize), // indent level
102	SynthesizeText(CowIndex),
103	SynthesizeChar(char),
104	FootnoteDefinition(CowIndex),
105	MetadataBlock(MetadataBlockKind),
106
107	// Tables
108	Table(AlignmentIndex),
109	TableHead,
110	TableRow,
111	TableCell,
112
113	// Dummy node at the top of the tree - should not be used otherwise!
114	#[default]
115	Root,
116	}
117
118	impl ItemBody {
119	fn is_inline(&self) -> bool {
120	matches!(
121	*self,
122	ItemBody::MaybeEmphasis(..)
123	\| ItemBody::MaybeMath(..)
124	\| ItemBody::MaybeSmartQuote(..)
125	\| ItemBody::MaybeHtml
126	\| ItemBody::MaybeCode(..)
127	\| ItemBody::MaybeLinkOpen
128	\| ItemBody::MaybeLinkClose(..)
129	\| ItemBody::MaybeImage
130	)
131	}
132	fn is_block(&self) -> bool {
133	matches!(
134	*self,
135	ItemBody::Paragraph
136	\| ItemBody::BlockQuote(..)
137	\| ItemBody::List(..)
138	\| ItemBody::ListItem(..)
139	\| ItemBody::HtmlBlock
140	\| ItemBody::Table(..)
141	\| ItemBody::TableHead
142	\| ItemBody::TableRow
143	\| ItemBody::TableCell
144	\| ItemBody::Heading(..)
145	\| ItemBody::Rule
146	)
147	}
148	}
149
150	#[derive(Debug)]
151	pub struct BrokenLink<'a> {
152	pub span: std::ops::Range<usize>,
153	pub link_type: LinkType,
154	pub reference: CowStr<'a>,
155	}
156
157	/// Markdown event iterator.
158	pub struct Parser<'input, F = DefaultBrokenLinkCallback> {
159	text: &'input str,
160	options: Options,
161	tree: Tree<Item>,
162	allocs: Allocations<'input>,
163	broken_link_callback: Option<F>,
164	html_scan_guard: HtmlScanGuard,
165
166	// https://github.com/pulldown-cmark/pulldown-cmark/issues/844
167	// Consider this example:
168	//
169	// [x]: xxx...
170	// [x]
171	// [x]
172	// [x]
173	//
174	// Which expands to this HTML:
175	//
176	// <a href="xxx...">x</a>
177	// <a href="xxx...">x</a>
178	// <a href="xxx...">x</a>
179	//
180	// This is quadratic growth, because it's filling in the area of a square.
181	// To prevent this, track how much it's expanded and limit it.
182	link_ref_expansion_limit: usize,
183
184	// used by inline passes. store them here for reuse
185	inline_stack: InlineStack,
186	link_stack: LinkStack,
187	}
188
189	impl<'input, F> std::fmt::Debug for Parser<'input, F> {
190	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
191	// Only print the fields that have public types.
192	f&mut DebugStruct<'_, '_>.debug_struct("Parser")
193	.field("text", &self.text)
194	.field("options", &self.options)
195	.field(
196	name:"broken_link_callback",
197	&self.broken_link_callback.as_ref().map(\|_\| ..),
198	)
199	.finish()
200	}
201	}
202
203	impl<'a> BrokenLink<'a> {
204	/// Moves the link into version with a static lifetime.
205	///
206	/// The `reference` member is cloned to a Boxed or Inline version.
207	pub fn into_static(self) -> BrokenLink<'static> {
208	BrokenLink {
209	span: self.span.clone(),
210	link_type: self.link_type,
211	reference: self.reference.into_string().into(),
212	}
213	}
214	}
215
216	impl<'input> Parser<'input, DefaultBrokenLinkCallback> {
217	/// Creates a new event iterator for a markdown string without any options enabled.
218	pub fn new(text: &'input str) -> Self {
219	Self::new_ext(text, Options::empty())
220	}
221
222	/// Creates a new event iterator for a markdown string with given options.
223	pub fn new_ext(text: &'input str, options: Options) -> Self {
224	Self::new_with_broken_link_callback(text, options, broken_link_callback:None)
225	}
226	}
227
228	impl<'input, F: BrokenLinkCallback<'input>> Parser<'input, F> {
229	/// In case the parser encounters any potential links that have a broken
230	/// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
231	/// the provided callback will be called with the reference name,
232	/// and the returned pair will be used as the link URL and title if it is not
233	/// `None`.
234	pub fn new_with_broken_link_callback(
235	text: &'input str,
236	options: Options,
237	broken_link_callback: Option<F>,
238	) -> Self {
239	let (mut tree, allocs) = run_first_pass(text, options);
240	tree.reset();
241	let inline_stack = Default::default();
242	let link_stack = Default::default();
243	let html_scan_guard = Default::default();
244	Parser {
245	text,
246	options,
247	tree,
248	allocs,
249	broken_link_callback,
250	inline_stack,
251	link_stack,
252	html_scan_guard,
253	// always allow 100KiB
254	link_ref_expansion_limit: text.len().max(`100_000`),
255	}
256	}
257
258	/// Returns a reference to the internal `RefDefs` object, which provides access
259	/// to the internal map of reference definitions.
260	pub fn reference_definitions(&self) -> &RefDefs<'_> {
261	&self.allocs.refdefs
262	}
263
264	/// Use a link label to fetch a type, url, and title.
265	///
266	/// This function enforces the [`link_ref_expansion_limit`].
267	/// If it returns Some, it also consumes some of the fuel.
268	/// If we're out of fuel, it immediately returns None.
269	///
270	/// The URL and title are found in the [`RefDefs`] map.
271	/// If they're not there, and a callback was provided by the user,
272	/// the [`broken_link_callback`] will be invoked and given the opportunity
273	/// to provide a fallback.
274	///
275	/// The link type (that's "link" or "image") depends on the usage site, and
276	/// is provided by the caller of this function.
277	/// This function returns a new one because, if it has to invoke a callback
278	/// to find the information, the link type is [mapped to an unknown type].
279	///
280	/// [mapped to an unknown type]: crate::LinkType::to_unknown
281	/// [`link_ref_expansion_limit`]: Self::link_ref_expansion_limit
282	/// [`broken_link_callback`]: Self::broken_link_callback
283	fn fetch_link_type_url_title(
284	&mut self,
285	link_label: CowStr<'input>,
286	span: Range<usize>,
287	link_type: LinkType,
288	) -> Option<(LinkType, CowStr<'input>, CowStr<'input>)> {
289	if self.link_ref_expansion_limit == `0` {
290	return None;
291	}
292
293	let (link_type, url, title) = self
294	.allocs
295	.refdefs
296	.get(link_label.as_ref())
297	.map(\|matching_def\| {
298	// found a matching definition!
299	let title = matching_def
300	.title
301	.as_ref()
302	.cloned()
303	.unwrap_or_else(\|\| "".into());
304	let url = matching_def.dest.clone();
305	(link_type, url, title)
306	})
307	.or_else(\|\| {
308	match self.broken_link_callback.as_mut() {
309	Some(callback) => {
310	// Construct a BrokenLink struct, which will be passed to the callback
311	let broken_link = BrokenLink {
312	span,
313	link_type,
314	reference: link_label,
315	};
316
317	callback
318	.handle_broken_link(broken_link)
319	.map(\|(url, title)\| (link_type.to_unknown(), url, title))
320	}
321	None => None,
322	}
323	})?;
324
325	// Limit expansion from link references.
326	// This isn't a problem for footnotes, because multiple references to the same one
327	// reuse the same node, but links/images get their HREF/SRC copied.
328	self.link_ref_expansion_limit = self
329	.link_ref_expansion_limit
330	.saturating_sub(url.len() + title.len());
331
332	Some((link_type, url, title))
333	}
334
335	/// Handle inline markup.
336	///
337	/// When the parser encounters any item indicating potential inline markup, all
338	/// inline markup passes are run on the remainder of the chain.
339	///
340	/// Note: there's some potential for optimization here, but that's future work.
341	fn handle_inline(&mut self) {
342	self.handle_inline_pass1();
343	self.handle_emphasis_and_hard_break();
344	}
345
346	/// Handle inline HTML, code spans, and links.
347	///
348	/// This function handles both inline HTML and code spans, because they have
349	/// the same precedence. It also handles links, even though they have lower
350	/// precedence, because the URL of links must not be processed.
351	fn handle_inline_pass1(&mut self) {
352	let mut code_delims = CodeDelims::new();
353	let mut math_delims = MathDelims::new();
354	let mut cur = self.tree.cur();
355	let mut prev = None;
356
357	let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
358	let block_text = &self.text[..block_end];
359
360	while let Some(mut cur_ix) = cur {
361	match self.tree[cur_ix].item.body {
362	ItemBody::MaybeHtml => {
363	let next = self.tree[cur_ix].next;
364	let autolink = if let Some(next_ix) = next {
365	scan_autolink(block_text, self.tree[next_ix].item.start)
366	} else {
367	None
368	};
369
370	if let Some((ix, uri, link_type)) = autolink {
371	let node = scan_nodes_to_ix(&self.tree, next, ix);
372	let text_node = self.tree.create_node(Item {
373	start: self.tree[cur_ix].item.start + `1`,
374	end: ix - `1`,
375	body: ItemBody::Text {
376	backslash_escaped: `false`,
377	},
378	});
379	let link_ix =
380	self.allocs
381	.allocate_link(link_type, uri, "".into(), "".into());
382	self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
383	self.tree[cur_ix].item.end = ix;
384	self.tree[cur_ix].next = node;
385	self.tree[cur_ix].child = Some(text_node);
386	prev = cur;
387	cur = node;
388	if let Some(node_ix) = cur {
389	self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix);
390	}
391	continue;
392	} else {
393	let inline_html = next.and_then(\|next_ix\| {
394	self.scan_inline_html(
395	block_text.as_bytes(),
396	self.tree[next_ix].item.start,
397	)
398	});
399	if let Some((span, ix)) = inline_html {
400	let node = scan_nodes_to_ix(&self.tree, next, ix);
401	self.tree[cur_ix].item.body = if !span.is_empty() {
402	let converted_string =
403	String::from_utf8(span).expect("invalid utf8");
404	ItemBody::OwnedHtml(
405	self.allocs.allocate_cow(converted_string.into()),
406	)
407	} else {
408	ItemBody::InlineHtml
409	};
410	self.tree[cur_ix].item.end = ix;
411	self.tree[cur_ix].next = node;
412	prev = cur;
413	cur = node;
414	if let Some(node_ix) = cur {
415	self.tree[node_ix].item.start =
416	max(self.tree[node_ix].item.start, ix);
417	}
418	continue;
419	}
420	}
421	self.tree[cur_ix].item.body = ItemBody::Text {
422	backslash_escaped: `false`,
423	};
424	}
425	ItemBody::MaybeMath(can_open, _can_close, brace_context) => {
426	if !can_open {
427	self.tree[cur_ix].item.body = ItemBody::Text {
428	backslash_escaped: `false`,
429	};
430	prev = cur;
431	cur = self.tree[cur_ix].next;
432	continue;
433	}
434	let is_display = self.tree[cur_ix].next.map_or(`false`, \|next_ix\| {
435	matches!(
436	self.tree[next_ix].item.body,
437	ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
438	)
439	});
440	let result = if math_delims.is_populated() {
441	// we have previously scanned all math environment delimiters,
442	// so we can reuse that work
443	math_delims.find(&self.tree, cur_ix, is_display, brace_context)
444	} else {
445	// we haven't previously scanned all math delimiters,
446	// so walk the AST
447	let mut scan = self.tree[cur_ix].next;
448	if is_display {
449	// a display delimiter, `$$`, is actually two delimiters
450	// skip the second one
451	scan = self.tree[scan.unwrap()].next;
452	}
453	let mut invalid = `false`;
454	while let Some(scan_ix) = scan {
455	if let ItemBody::MaybeMath(_can_open, can_close, delim_brace_context) =
456	self.tree[scan_ix].item.body
457	{
458	let delim_is_display =
459	self.tree[scan_ix].next.map_or(`false`, \|next_ix\| {
460	matches!(
461	self.tree[next_ix].item.body,
462	ItemBody::MaybeMath(
463	_can_open,
464	_can_close,
465	_brace_context
466	)
467	)
468	});
469	if !invalid && delim_brace_context == brace_context {
470	if (!is_display && can_close)
471	\|\| (is_display && delim_is_display)
472	{
473	// This will skip ahead past everything we
474	// just inserted. Needed for correctness to
475	// ensure that a new scan is done after this item.
476	math_delims.clear();
477	break;
478	} else {
479	// Math cannot contain $, so the current item
480	// is invalid. Keep scanning to fill math_delims.
481	invalid = `true`;
482	}
483	}
484	math_delims.insert(
485	delim_is_display,
486	delim_brace_context,
487	scan_ix,
488	can_close,
489	);
490	}
491	if self.tree[scan_ix].item.body.is_block() {
492	// If this is a tight list, blocks and inlines might be
493	// siblings. Inlines can't cross the boundary like that.
494	scan = None;
495	break;
496	}
497	scan = self.tree[scan_ix].next;
498	}
499	scan
500	};
501
502	if let Some(scan_ix) = result {
503	self.make_math_span(cur_ix, scan_ix);
504	} else {
505	self.tree[cur_ix].item.body = ItemBody::Text {
506	backslash_escaped: `false`,
507	};
508	}
509	}
510	ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
511	if preceded_by_backslash {
512	search_count -= `1`;
513	if search_count == `0` {
514	self.tree[cur_ix].item.body = ItemBody::Text {
515	backslash_escaped: `false`,
516	};
517	prev = cur;
518	cur = self.tree[cur_ix].next;
519	continue;
520	}
521	}
522
523	if code_delims.is_populated() {
524	// we have previously scanned all codeblock delimiters,
525	// so we can reuse that work
526	if let Some(scan_ix) = code_delims.find(cur_ix, search_count) {
527	self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
528	} else {
529	self.tree[cur_ix].item.body = ItemBody::Text {
530	backslash_escaped: `false`,
531	};
532	}
533	} else {
534	// we haven't previously scanned all codeblock delimiters,
535	// so walk the AST
536	let mut scan = if search_count > `0` {
537	self.tree[cur_ix].next
538	} else {
539	None
540	};
541	while let Some(scan_ix) = scan {
542	if let ItemBody::MaybeCode(delim_count, _) =
543	self.tree[scan_ix].item.body
544	{
545	if search_count == delim_count {
546	self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
547	code_delims.clear();
548	break;
549	} else {
550	code_delims.insert(delim_count, scan_ix);
551	}
552	}
553	if self.tree[scan_ix].item.body.is_block() {
554	// If this is a tight list, blocks and inlines might be
555	// siblings. Inlines can't cross the boundary like that.
556	scan = None;
557	break;
558	}
559	scan = self.tree[scan_ix].next;
560	}
561	if scan.is_none() {
562	self.tree[cur_ix].item.body = ItemBody::Text {
563	backslash_escaped: `false`,
564	};
565	}
566	}
567	}
568	ItemBody::MaybeLinkOpen => {
569	self.tree[cur_ix].item.body = ItemBody::Text {
570	backslash_escaped: `false`,
571	};
572	self.link_stack.push(LinkStackEl {
573	node: cur_ix,
574	ty: LinkStackTy::Link,
575	});
576	}
577	ItemBody::MaybeImage => {
578	self.tree[cur_ix].item.body = ItemBody::Text {
579	backslash_escaped: `false`,
580	};
581	self.link_stack.push(LinkStackEl {
582	node: cur_ix,
583	ty: LinkStackTy::Image,
584	});
585	}
586	ItemBody::MaybeLinkClose(could_be_ref) => {
587	self.tree[cur_ix].item.body = ItemBody::Text {
588	backslash_escaped: `false`,
589	};
590	if let Some(tos) = self.link_stack.pop() {
591	if tos.ty == LinkStackTy::Disabled {
592	continue;
593	}
594	let next = self.tree[cur_ix].next;
595	if let Some((next_ix, url, title)) =
596	self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
597	{
598	let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
599	if let Some(prev_ix) = prev {
600	self.tree[prev_ix].next = None;
601	}
602	cur = Some(tos.node);
603	cur_ix = tos.node;
604	let link_ix =
605	self.allocs
606	.allocate_link(LinkType::Inline, url, title, "".into());
607	self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
608	ItemBody::Image(link_ix)
609	} else {
610	ItemBody::Link(link_ix)
611	};
612	self.tree[cur_ix].child = self.tree[cur_ix].next;
613	self.tree[cur_ix].next = next_node;
614	self.tree[cur_ix].item.end = next_ix;
615	if let Some(next_node_ix) = next_node {
616	self.tree[next_node_ix].item.start =
617	max(self.tree[next_node_ix].item.start, next_ix);
618	}
619
620	if tos.ty == LinkStackTy::Link {
621	self.link_stack.disable_all_links();
622	}
623	} else {
624	// ok, so its not an inline link. maybe it is a reference
625	// to a defined link?
626	let scan_result = scan_reference(
627	&self.tree,
628	block_text,
629	next,
630	self.options.contains(Options::ENABLE_FOOTNOTES),
631	self.options.has_gfm_footnotes(),
632	);
633	let (node_after_link, link_type) = match scan_result {
634	// [label][reference]
635	RefScan::LinkLabel(_, end_ix) => {
636	// Toggle reference viability of the last closing bracket,
637	// so that we can skip it on future iterations in case
638	// it fails in this one. In particular, we won't call
639	// the broken link callback twice on one reference.
640	let reference_close_node = if let Some(node) =
641	scan_nodes_to_ix(&self.tree, next, end_ix - `1`)
642	{
643	node
644	} else {
645	continue;
646	};
647	self.tree[reference_close_node].item.body =
648	ItemBody::MaybeLinkClose(`false`);
649	let next_node = self.tree[reference_close_node].next;
650
651	(next_node, LinkType::Reference)
652	}
653	// [reference][]
654	RefScan::Collapsed(next_node) => {
655	// This reference has already been tried, and it's not
656	// valid. Skip it.
657	if !could_be_ref {
658	continue;
659	}
660	(next_node, LinkType::Collapsed)
661	}
662	// [shortcut]
663	//
664	// [shortcut]: /blah
665	RefScan::Failed \| RefScan::UnexpectedFootnote => {
666	if !could_be_ref {
667	continue;
668	}
669	(next, LinkType::Shortcut)
670	}
671	};
672
673	// FIXME: references and labels are mixed in the naming of variables
674	// below. Disambiguate!
675
676	// (label, source_ix end)
677	let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
678	RefScan::LinkLabel(l, end_ix) => {
679	Some((ReferenceLabel::Link(l), end_ix))
680	}
681	RefScan::Collapsed(..)
682	\| RefScan::Failed
683	\| RefScan::UnexpectedFootnote => {
684	// No label? maybe it is a shortcut reference
685	let label_start = self.tree[tos.node].item.end - `1`;
686	let label_end = self.tree[cur_ix].item.end;
687	scan_link_label(
688	&self.tree,
689	&self.text[label_start..label_end],
690	self.options.contains(Options::ENABLE_FOOTNOTES),
691	self.options.has_gfm_footnotes(),
692	)
693	.map(\|(ix, label)\| (label, label_start + ix))
694	.filter(\|(_, end)\| *end == label_end)
695	}
696	};
697
698	let id = match &label {
699	Some(
700	(ReferenceLabel::Link(l), _) \| (ReferenceLabel::Footnote(l), _),
701	) => l.clone(),
702	None => "".into(),
703	};
704
705	// see if it's a footnote reference
706	if let Some((ReferenceLabel::Footnote(l), end)) = label {
707	let footref = self.allocs.allocate_cow(l);
708	if let Some(def) = self
709	.allocs
710	.footdefs
711	.get_mut(self.allocs.cows[footref.0].to_owned())
712	{
713	def.use_count += `1`;
714	}
715	if !self.options.has_gfm_footnotes()
716	\|\| self.allocs.footdefs.contains(&self.allocs.cows[footref.0])
717	{
718	// If this came from a MaybeImage, then the `!` prefix
719	// isn't part of the footnote reference.
720	let footnote_ix = if tos.ty == LinkStackTy::Image {
721	self.tree[tos.node].next = Some(cur_ix);
722	self.tree[tos.node].child = None;
723	self.tree[tos.node].item.body =
724	ItemBody::SynthesizeChar('!');
725	self.tree[cur_ix].item.start =
726	self.tree[tos.node].item.start + `1`;
727	self.tree[tos.node].item.end =
728	self.tree[tos.node].item.start + `1`;
729	cur_ix
730	} else {
731	tos.node
732	};
733	// use `next` instead of `node_after_link` because
734	// node_after_link is calculated for a [collapsed][] link,
735	// which footnotes don't support.
736	self.tree[footnote_ix].next = next;
737	self.tree[footnote_ix].child = None;
738	self.tree[footnote_ix].item.body =
739	ItemBody::FootnoteReference(footref);
740	self.tree[footnote_ix].item.end = end;
741	prev = Some(footnote_ix);
742	cur = next;
743	self.link_stack.clear();
744	continue;
745	}
746	} else if let Some((ReferenceLabel::Link(link_label), end)) = label {
747	if let Some((def_link_type, url, title)) = self
748	.fetch_link_type_url_title(
749	link_label,
750	(self.tree[tos.node].item.start)..end,
751	link_type,
752	)
753	{
754	let link_ix =
755	self.allocs.allocate_link(def_link_type, url, title, id);
756	self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
757	{
758	ItemBody::Image(link_ix)
759	} else {
760	ItemBody::Link(link_ix)
761	};
762	let label_node = self.tree[tos.node].next;
763
764	// lets do some tree surgery to add the link to the tree
765	// 1st: skip the label node and close node
766	self.tree[tos.node].next = node_after_link;
767
768	// then, if it exists, add the label node as a child to the link node
769	if label_node != cur {
770	self.tree[tos.node].child = label_node;
771
772	// finally: disconnect list of children
773	if let Some(prev_ix) = prev {
774	self.tree[prev_ix].next = None;
775	}
776	}
777
778	self.tree[tos.node].item.end = end;
779
780	// set up cur so next node will be node_after_link
781	cur = Some(tos.node);
782	cur_ix = tos.node;
783
784	if tos.ty == LinkStackTy::Link {
785	self.link_stack.disable_all_links();
786	}
787	}
788	}
789	}
790	}
791	}
792	_ => {
793	// If this is a tight list, blocks and inlines might be
794	// siblings. Inlines can't cross the boundary like that.
795	if let Some(cur_ix) = cur {
796	if self.tree[cur_ix].item.body.is_block() {
797	self.link_stack.clear();
798	}
799	}
800	}
801	}
802	prev = cur;
803	cur = self.tree[cur_ix].next;
804	}
805	self.link_stack.clear();
806	}
807
808	fn handle_emphasis_and_hard_break(&mut self) {
809	let mut prev = None;
810	let mut prev_ix: TreeIndex;
811	let mut cur = self.tree.cur();
812
813	let mut single_quote_open: Option<TreeIndex> = None;
814	let mut double_quote_open: bool = `false`;
815
816	while let Some(mut cur_ix) = cur {
817	match self.tree[cur_ix].item.body {
818	ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
819	let run_length = count;
820	let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
821	let both = can_open && can_close;
822	if can_close {
823	while let Some(el) =
824	self.inline_stack
825	.find_match(&mut self.tree, c, run_length, both)
826	{
827	// have a match!
828	if let Some(prev_ix) = prev {
829	self.tree[prev_ix].next = None;
830	}
831	let match_count = min(count, el.count);
832	// start, end are tree node indices
833	let mut end = cur_ix - `1`;
834	let mut start = el.start + el.count;
835
836	// work from the inside out
837	while start > el.start + el.count - match_count {
838	let inc = if start > el.start + el.count - match_count + `1` {
839	`2`
840	} else {
841	`1`
842	};
843	let ty = if c == b'~' {
844	ItemBody::Strikethrough
845	} else if inc == `2` {
846	ItemBody::Strong
847	} else {
848	ItemBody::Emphasis
849	};
850
851	let root = start - inc;
852	end = end + inc;
853	self.tree[root].item.body = ty;
854	self.tree[root].item.end = self.tree[end].item.end;
855	self.tree[root].child = Some(start);
856	self.tree[root].next = None;
857	start = root;
858	}
859
860	// set next for top most emph level
861	prev_ix = el.start + el.count - match_count;
862	prev = Some(prev_ix);
863	cur = self.tree[cur_ix + match_count - `1`].next;
864	self.tree[prev_ix].next = cur;
865
866	if el.count > match_count {
867	self.inline_stack.push(InlineEl {
868	start: el.start,
869	count: el.count - match_count,
870	run_length: el.run_length,
871	c: el.c,
872	both: el.both,
873	})
874	}
875	count -= match_count;
876	if count > `0` {
877	cur_ix = cur.unwrap();
878	} else {
879	break;
880	}
881	}
882	}
883	if count > `0` {
884	if can_open {
885	self.inline_stack.push(InlineEl {
886	start: cur_ix,
887	run_length,
888	count,
889	c,
890	both,
891	});
892	} else {
893	for i in `0`..count {
894	self.tree[cur_ix + i].item.body = ItemBody::Text {
895	backslash_escaped: `false`,
896	};
897	}
898	}
899	prev_ix = cur_ix + count - `1`;
900	prev = Some(prev_ix);
901	cur = self.tree[prev_ix].next;
902	}
903	}
904	ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
905	self.tree[cur_ix].item.body = match c {
906	b'`\'`' => {
907	if let (Some(open_ix), `true`) = (single_quote_open, can_close) {
908	self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
909	single_quote_open = None;
910	} else if can_open {
911	single_quote_open = Some(cur_ix);
912	}
913	ItemBody::SynthesizeChar('’')
914	}
915	_ / double quote / => {
916	if can_close && double_quote_open {
917	double_quote_open = `false`;
918	ItemBody::SynthesizeChar('”')
919	} else {
920	if can_open && !double_quote_open {
921	double_quote_open = `true`;
922	}
923	ItemBody::SynthesizeChar('“')
924	}
925	}
926	};
927	prev = cur;
928	cur = self.tree[cur_ix].next;
929	}
930	ItemBody::HardBreak(`true`) => {
931	if self.tree[cur_ix].next.is_none() {
932	self.tree[cur_ix].item.body = ItemBody::SynthesizeChar('`\\`');
933	}
934	prev = cur;
935	cur = self.tree[cur_ix].next;
936	}
937	_ => {
938	prev = cur;
939	// If this is a tight list, blocks and inlines might be
940	// siblings. Inlines can't cross the boundary like that.
941	if let Some(cur_ix) = cur {
942	if self.tree[cur_ix].item.body.is_block() {
943	self.inline_stack.pop_all(&mut self.tree);
944	}
945	}
946	cur = self.tree[cur_ix].next;
947	}
948	}
949	}
950	self.inline_stack.pop_all(&mut self.tree);
951	}
952
953	/// Returns next byte index, url and title.
954	fn scan_inline_link(
955	&self,
956	underlying: &'input str,
957	mut ix: usize,
958	node: Option<TreeIndex>,
959	) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
960	if scan_ch(&underlying.as_bytes()[ix..], b'(') == `0` {
961	return None;
962	}
963	ix += `1`;
964
965	let scan_separator = \|ix: &mut usize\| {
966	ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace_no_nl);
967	if let Some(bl) = scan_eol(&underlying.as_bytes()[*ix..]) {
968	*ix += bl;
969	let mut line_start = LineStart::new(&underlying.as_bytes()[*ix..]);
970	let _ = scan_containers(
971	&self.tree,
972	&mut line_start,
973	self.options.has_gfm_footnotes(),
974	);
975	*ix += line_start.bytes_scanned();
976	}
977	ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace_no_nl);
978	};
979
980	scan_separator(&mut ix);
981
982	let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
983	let dest = unescape(dest, self.tree.is_in_table());
984	ix += dest_length;
985
986	scan_separator(&mut ix);
987
988	let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
989	ix += bytes_scanned;
990	scan_separator(&mut ix);
991	t
992	} else {
993	"".into()
994	};
995	if scan_ch(&underlying.as_bytes()[ix..], b')') == `0` {
996	return None;
997	}
998	ix += `1`;
999
1000	Some((ix, dest, title))
1001	}
1002
1003	// returns (bytes scanned, title cow)
1004	fn scan_link_title(
1005	&self,
1006	text: &'input str,
1007	start_ix: usize,
1008	node: Option<TreeIndex>,
1009	) -> Option<(usize, CowStr<'input>)> {
1010	let bytes = text.as_bytes();
1011	let open = match bytes.get(start_ix) {
1012	Some(b @ b'`\'`') \| Some(b @ b'`\"`') \| Some(b @ b'(') => *b,
1013	_ => return None,
1014	};
1015	let close = if open == b'(' { b')' } else { open };
1016
1017	let mut title = String::new();
1018	let mut mark = start_ix + `1`;
1019	let mut i = start_ix + `1`;
1020
1021	while i < bytes.len() {
1022	let c = bytes[i];
1023
1024	if c == close {
1025	let cow = if mark == `1` {
1026	(i - start_ix + `1`, text[mark..i].into())
1027	} else {
1028	title.push_str(&text[mark..i]);
1029	(i - start_ix + `1`, title.into())
1030	};
1031
1032	return Some(cow);
1033	}
1034	if c == open {
1035	return None;
1036	}
1037
1038	if c == b'`\n`' \|\| c == b'`\r`' {
1039	if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + `1`) {
1040	if self.tree[node_ix].item.start > i {
1041	title.push_str(&text[mark..i]);
1042	title.push('`\n`');
1043	i = self.tree[node_ix].item.start;
1044	mark = i;
1045	continue;
1046	}
1047	}
1048	}
1049	if c == b'&' {
1050	if let (n, Some(value)) = scan_entity(&bytes[i..]) {
1051	title.push_str(&text[mark..i]);
1052	title.push_str(&value);
1053	i += n;
1054	mark = i;
1055	continue;
1056	}
1057	}
1058	if self.tree.is_in_table()
1059	&& c == b'`\\`'
1060	&& i + `2` < bytes.len()
1061	&& bytes[i + `1`] == b'`\\`'
1062	&& bytes[i + `2`] == b'\|'
1063	{
1064	// this runs if there are an even number of pipes in a table
1065	// if it's odd, then it gets parsed as normal
1066	title.push_str(&text[mark..i]);
1067	i += `2`;
1068	mark = i;
1069	}
1070	if c == b'`\\`' && i + `1` < bytes.len() && is_ascii_punctuation(bytes[i + `1`]) {
1071	title.push_str(&text[mark..i]);
1072	i += `1`;
1073	mark = i;
1074	}
1075
1076	i += `1`;
1077	}
1078
1079	None
1080	}
1081
1082	fn make_math_span(&mut self, open: TreeIndex, mut close: TreeIndex) {
1083	let start_is_display = self.tree[open].next.filter(\|&next_ix\| {
1084	next_ix != close
1085	&& matches!(
1086	self.tree[next_ix].item.body,
1087	ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
1088	)
1089	});
1090	let end_is_display = self.tree[close].next.filter(\|&next_ix\| {
1091	matches!(
1092	self.tree[next_ix].item.body,
1093	ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
1094	)
1095	});
1096	let is_display = start_is_display.is_some() && end_is_display.is_some();
1097	if is_display {
1098	// This unwrap() can't panic, because if the next variable were None, end_is_display would be None
1099	close = self.tree[close].next.unwrap();
1100	self.tree[open].next = Some(close);
1101	self.tree[open].item.end += `1`;
1102	self.tree[close].item.start -= `1`;
1103	} else {
1104	if self.tree[open].item.end == self.tree[close].item.start {
1105	// inline math spans cannot be empty
1106	self.tree[open].item.body = ItemBody::Text {
1107	backslash_escaped: `false`,
1108	};
1109	return;
1110	}
1111	self.tree[open].next = Some(close);
1112	}
1113	let span_start = self.tree[open].item.end;
1114	let span_end = self.tree[close].item.start;
1115
1116	let bytes = self.text.as_bytes();
1117	let mut buf: Option<String> = None;
1118
1119	let mut start_ix = span_start;
1120	let mut ix = span_start;
1121	while ix < span_end {
1122	let c = bytes[ix];
1123	if c == b'`\r`' \|\| c == b'`\n`' {
1124	ix += `1`;
1125	let buf = buf.get_or_insert_with(\|\| String::with_capacity(ix - span_start));
1126	buf.push_str(&self.text[start_ix..ix]);
1127	let mut line_start = LineStart::new(&bytes[ix..]);
1128	let _ = scan_containers(
1129	&self.tree,
1130	&mut line_start,
1131	self.options.has_gfm_footnotes(),
1132	);
1133	ix += line_start.bytes_scanned();
1134	start_ix = ix;
1135	} else if c == b'`\\`' && bytes.get(ix + `1`) == Some(&b'\|') && self.tree.is_in_table() {
1136	let buf = buf.get_or_insert_with(\|\| String::with_capacity(ix + `1` - span_start));
1137	buf.push_str(&self.text[start_ix..ix]);
1138	buf.push('\|');
1139	ix += `2`;
1140	start_ix = ix;
1141	} else {
1142	ix += `1`;
1143	}
1144	}
1145
1146	let cow = if let Some(mut buf) = buf {
1147	buf.push_str(&self.text[start_ix..span_end]);
1148	buf.into()
1149	} else {
1150	self.text[span_start..span_end].into()
1151	};
1152
1153	self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow), is_display);
1154	self.tree[open].item.end = self.tree[close].item.end;
1155	self.tree[open].next = self.tree[close].next;
1156	}
1157
1158	/// Make a code span.
1159	///
1160	/// Both `open` and `close` are matching MaybeCode items.
1161	fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
1162	let bytes = self.text.as_bytes();
1163	let span_start = self.tree[open].item.end;
1164	let span_end = self.tree[close].item.start;
1165	let mut buf: Option<String> = None;
1166
1167	let mut start_ix = span_start;
1168	let mut ix = span_start;
1169	while ix < span_end {
1170	let c = bytes[ix];
1171	if c == b'`\r`' \|\| c == b'`\n`' {
1172	let buf = buf.get_or_insert_with(\|\| String::with_capacity(ix + `1` - span_start));
1173	buf.push_str(&self.text[start_ix..ix]);
1174	buf.push(' ');
1175	ix += `1`;
1176	let mut line_start = LineStart::new(&bytes[ix..]);
1177	let _ = scan_containers(
1178	&self.tree,
1179	&mut line_start,
1180	self.options.has_gfm_footnotes(),
1181	);
1182	ix += line_start.bytes_scanned();
1183	start_ix = ix;
1184	} else if c == b'`\\`' && bytes.get(ix + `1`) == Some(&b'\|') && self.tree.is_in_table() {
1185	let buf = buf.get_or_insert_with(\|\| String::with_capacity(ix + `1` - span_start));
1186	buf.push_str(&self.text[start_ix..ix]);
1187	buf.push('\|');
1188	ix += `2`;
1189	start_ix = ix;
1190	} else {
1191	ix += `1`;
1192	}
1193	}
1194
1195	let (opening, closing, all_spaces) = {
1196	let s = if let Some(buf) = &mut buf {
1197	buf.push_str(&self.text[start_ix..span_end]);
1198	&buf[..]
1199	} else {
1200	&self.text[span_start..span_end]
1201	};
1202	(
1203	s.as_bytes().first() == Some(&b' '),
1204	s.as_bytes().last() == Some(&b' '),
1205	s.bytes().all(\|b\| b == b' '),
1206	)
1207	};
1208
1209	let cow: CowStr<'input> = if !all_spaces && opening && closing {
1210	if let Some(mut buf) = buf {
1211	buf.remove(`0`);
1212	buf.pop();
1213	buf.into()
1214	} else {
1215	let lo = span_start + `1`;
1216	let hi = (span_end - `1`).max(lo);
1217	self.text[lo..hi].into()
1218	}
1219	} else if let Some(buf) = buf {
1220	buf.into()
1221	} else {
1222	self.text[span_start..span_end].into()
1223	};
1224
1225	if preceding_backslash {
1226	self.tree[open].item.body = ItemBody::Text {
1227	backslash_escaped: `true`,
1228	};
1229	self.tree[open].item.end = self.tree[open].item.start + `1`;
1230	self.tree[open].next = Some(close);
1231	self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1232	self.tree[close].item.start = self.tree[open].item.start + `1`;
1233	} else {
1234	self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1235	self.tree[open].item.end = self.tree[close].item.end;
1236	self.tree[open].next = self.tree[close].next;
1237	}
1238	}
1239
1240	/// On success, returns a buffer containing the inline html and byte offset.
1241	/// When no bytes were skipped, the buffer will be empty and the html can be
1242	/// represented as a subslice of the input string.
1243	fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
1244	let c = *bytes.get(ix)?;
1245	if c == b'!' {
1246	Some((
1247	vec![],
1248	scan_inline_html_comment(bytes, ix + `1`, &mut self.html_scan_guard)?,
1249	))
1250	} else if c == b'?' {
1251	Some((
1252	vec![],
1253	scan_inline_html_processing(bytes, ix + `1`, &mut self.html_scan_guard)?,
1254	))
1255	} else {
1256	let (span, i) = scan_html_block_inner(
1257	// Subtract 1 to include the < character
1258	&bytes[(ix - `1`)..],
1259	Some(&\|bytes\| {
1260	let mut line_start = LineStart::new(bytes);
1261	let _ = scan_containers(
1262	&self.tree,
1263	&mut line_start,
1264	self.options.has_gfm_footnotes(),
1265	);
1266	line_start.bytes_scanned()
1267	}),
1268	)?;
1269	Some((span, i + ix - `1`))
1270	}
1271	}
1272
1273	/// Consumes the event iterator and produces an iterator that produces
1274	/// `(Event, Range)` pairs, where the `Range` value maps to the corresponding
1275	/// range in the markdown source.
1276	pub fn into_offset_iter(self) -> OffsetIter<'input, F> {
1277	OffsetIter { inner: self }
1278	}
1279	}
1280
1281	/// Returns number of containers scanned.
1282	pub(crate) fn scan_containers(
1283	tree: &Tree<Item>,
1284	line_start: &mut LineStart<'_>,
1285	gfm_footnotes: bool,
1286	) -> usize {
1287	let mut i = `0`;
1288	for &node_ix in tree.walk_spine() {
1289	match tree[node_ix].item.body {
1290	ItemBody::BlockQuote(..) => {
1291	// `scan_blockquote_marker` saves & restores internally
1292	if !line_start.scan_blockquote_marker() {
1293	break;
1294	}
1295	}
1296	ItemBody::ListItem(indent) => {
1297	let save = line_start.clone();
1298	if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1299	*line_start = save;
1300	break;
1301	}
1302	}
1303	ItemBody::FootnoteDefinition(..) if gfm_footnotes => {
1304	let save = line_start.clone();
1305	if !line_start.scan_space(`4`) && !line_start.is_at_eol() {
1306	*line_start = save;
1307	break;
1308	}
1309	}
1310	_ => (),
1311	}
1312	i += `1`;
1313	}
1314	i
1315	}
1316
1317	impl Tree<Item> {
1318	pub(crate) fn append_text(&mut self, start: usize, end: usize, backslash_escaped: bool) {
1319	if end > start {
1320	if let Some(ix) = self.cur() {
1321	if matches!(self[ix].item.body, ItemBody::Text { .. }) && self[ix].item.end == start
1322	{
1323	self[ix].item.end = end;
1324	return;
1325	}
1326	}
1327	self.append(Item {
1328	start,
1329	end,
1330	body: ItemBody::Text { backslash_escaped },
1331	});
1332	}
1333	}
1334	/// Returns true if the current node is inside a table.
1335	///
1336	/// If `cur` is an ItemBody::Table, it would return false,
1337	/// but since the `TableRow` and `TableHead` and `TableCell`
1338	/// are children of the table, anything doing inline parsing
1339	/// doesn't need to care about that.
1340	pub(crate) fn is_in_table(&self) -> bool {
1341	fn might_be_in_table(item: &Item) -> bool {
1342	item.body.is_inline()
1343	\|\| matches!(item.body, \|ItemBody::TableHead\| ItemBody::TableRow
1344	\| ItemBody::TableCell)
1345	}
1346	for &ix in self.walk_spine().rev() {
1347	if matches!(self[ix].item.body, ItemBody::Table(_)) {
1348	return `true`;
1349	}
1350	if !might_be_in_table(&self[ix].item) {
1351	return `false`;
1352	}
1353	}
1354	`false`
1355	}
1356	}
1357
1358	#[derive(Copy, Clone, Debug)]
1359	struct InlineEl {
1360	/// offset of tree node
1361	start: TreeIndex,
1362	/// number of delimiters available for matching
1363	count: usize,
1364	/// length of the run that these delimiters came from
1365	run_length: usize,
1366	/// b'', b'_', or b'~'*
1367	c: u8,
1368	/// can both open and close
1369	both: bool,
1370	}
1371
1372	#[derive(Debug, Clone, Default)]
1373	struct InlineStack {
1374	stack: Vec<InlineEl>,
1375	// Lower bounds for matching indices in the stack. For example
1376	// a strikethrough delimiter will never match with any element
1377	// in the stack with index smaller than
1378	// `lower_bounds[InlineStack::TILDES]`.
1379	lower_bounds: [usize; `9`],
1380	}
1381
1382	impl InlineStack {
1383	/// These are indices into the lower bounds array.
1384	/// Not both refers to the property that the delimiter can not both
1385	/// be opener as a closer.
1386	const UNDERSCORE_NOT_BOTH: usize = `0`;
1387	const ASTERISK_NOT_BOTH: usize = `1`;
1388	const ASTERISK_BASE: usize = `2`;
1389	const TILDES: usize = `5`;
1390	const UNDERSCORE_BASE: usize = `6`;
1391
1392	fn pop_all(&mut self, tree: &mut Tree<Item>) {
1393	for el in self.stack.drain(..) {
1394	for i in `0`..el.count {
1395	tree[el.start + i].item.body = ItemBody::Text {
1396	backslash_escaped: `false`,
1397	};
1398	}
1399	}
1400	self.lower_bounds = [`0`; `9`];
1401	}
1402
1403	fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
1404	if c == b'_' {
1405	let mod3_lower = self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % `3`];
1406	if both {
1407	mod3_lower
1408	} else {
1409	min(
1410	mod3_lower,
1411	self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH],
1412	)
1413	}
1414	} else if c == b'*' {
1415	let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % `3`];
1416	if both {
1417	mod3_lower
1418	} else {
1419	min(
1420	mod3_lower,
1421	self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
1422	)
1423	}
1424	} else {
1425	self.lower_bounds[InlineStack::TILDES]
1426	}
1427	}
1428
1429	fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
1430	if c == b'_' {
1431	if both {
1432	self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % `3`] = new_bound;
1433	} else {
1434	self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
1435	}
1436	} else if c == b'*' {
1437	self.lower_bounds[InlineStack::ASTERISK_BASE + count % `3`] = new_bound;
1438	if !both {
1439	self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1440	}
1441	} else {
1442	self.lower_bounds[InlineStack::TILDES] = new_bound;
1443	}
1444	}
1445
1446	fn truncate(&mut self, new_bound: usize) {
1447	self.stack.truncate(new_bound);
1448	for lower_bound in &mut self.lower_bounds {
1449	if *lower_bound > new_bound {
1450	*lower_bound = new_bound;
1451	}
1452	}
1453	}
1454
1455	fn find_match(
1456	&mut self,
1457	tree: &mut Tree<Item>,
1458	c: u8,
1459	run_length: usize,
1460	both: bool,
1461	) -> Option<InlineEl> {
1462	let lowerbound = min(self.stack.len(), self.get_lowerbound(c, run_length, both));
1463	let res = self.stack[lowerbound..]
1464	.iter()
1465	.cloned()
1466	.enumerate()
1467	.rfind(\|(_, el)\| {
1468	if c == b'~' && run_length != el.run_length {
1469	return `false`;
1470	}
1471	el.c == c
1472	&& (!both && !el.both
1473	\|\| (run_length + el.run_length) % `3` != `0`
1474	\|\| run_length % `3` == `0`)
1475	});
1476
1477	if let Some((matching_ix, matching_el)) = res {
1478	let matching_ix = matching_ix + lowerbound;
1479	for el in &self.stack[(matching_ix + `1`)..] {
1480	for i in `0`..el.count {
1481	tree[el.start + i].item.body = ItemBody::Text {
1482	backslash_escaped: `false`,
1483	};
1484	}
1485	}
1486	self.truncate(matching_ix);
1487	Some(matching_el)
1488	} else {
1489	self.set_lowerbound(c, run_length, both, self.stack.len());
1490	None
1491	}
1492	}
1493
1494	fn trim_lower_bound(&mut self, ix: usize) {
1495	self.lower_bounds[ix] = self.lower_bounds[ix].min(self.stack.len());
1496	}
1497
1498	fn push(&mut self, el: InlineEl) {
1499	if el.c == b'~' {
1500	self.trim_lower_bound(InlineStack::TILDES);
1501	}
1502	self.stack.push(el)
1503	}
1504	}
1505
1506	#[derive(Debug, Clone)]
1507	enum RefScan<'a> {
1508	// label, source ix of label end
1509	LinkLabel(CowStr<'a>, usize),
1510	// contains next node index
1511	Collapsed(Option<TreeIndex>),
1512	UnexpectedFootnote,
1513	Failed,
1514	}
1515
1516	/// Skips forward within a block to a node which spans (ends inclusive) the given
1517	/// index into the source.
1518	fn scan_nodes_to_ix(
1519	tree: &Tree<Item>,
1520	mut node: Option<TreeIndex>,
1521	ix: usize,
1522	) -> Option<TreeIndex> {
1523	while let Some(node_ix: TreeIndex) = node {
1524	if tree[node_ix].item.end <= ix {
1525	node = tree[node_ix].next;
1526	} else {
1527	break;
1528	}
1529	}
1530	node
1531	}
1532
1533	/// Scans an inline link label, which cannot be interrupted.
1534	/// Returns number of bytes (including brackets) and label on success.
1535	fn scan_link_label<'text>(
1536	tree: &Tree<Item>,
1537	text: &'text str,
1538	allow_footnote_refs: bool,
1539	gfm_footnotes: bool,
1540	) -> Option<(usize, ReferenceLabel<'text>)> {
1541	let bytes = text.as_bytes();
1542	if bytes.len() < `2` \|\| bytes[`0`] != b'[' {
1543	return None;
1544	}
1545	let linebreak_handler = \|bytes: &[u8]\| {
1546	let mut line_start = LineStart::new(bytes);
1547	let _ = scan_containers(tree, &mut line_start, gfm_footnotes);
1548	Some(line_start.bytes_scanned())
1549	};
1550	if allow_footnote_refs && b'^' == bytes[`1`] && bytes.get(`2`) != Some(&b']') {
1551	let linebreak_handler: &dyn Fn(&[u8]) -> Option<usize> = if gfm_footnotes {
1552	&\|_\| None
1553	} else {
1554	&linebreak_handler
1555	};
1556	if let Some((byte_index, cow)) =
1557	scan_link_label_rest(&text[`2`..], linebreak_handler, tree.is_in_table())
1558	{
1559	return Some((byte_index + `2`, ReferenceLabel::Footnote(cow)));
1560	}
1561	}
1562	let (byte_index, cow) =
1563	scan_link_label_rest(&text[`1`..], &linebreak_handler, tree.is_in_table())?;
1564	Some((byte_index + `1`, ReferenceLabel::Link(cow)))
1565	}
1566
1567	fn scan_reference<'b>(
1568	tree: &Tree<Item>,
1569	text: &'b str,
1570	cur: Option<TreeIndex>,
1571	allow_footnote_refs: bool,
1572	gfm_footnotes: bool,
1573	) -> RefScan<'b> {
1574	let cur_ix: TreeIndex = match cur {
1575	None => return RefScan::Failed,
1576	Some(cur_ix: TreeIndex) => cur_ix,
1577	};
1578	let start: usize = tree[cur_ix].item.start;
1579	let tail: &[u8] = &text.as_bytes()[start..];
1580
1581	if tail.starts_with(needle:b"[]") {
1582	// TODO: this unwrap is sus and should be looked at closer
1583	let closing_node: TreeIndex = tree[cur_ix].next.unwrap();
1584	RefScan::Collapsed(tree[closing_node].next)
1585	} else {
1586	let label: Option<(usize, ReferenceLabel<'_>)> = scan_link_label(tree, &text[start..], allow_footnote_refs, gfm_footnotes);
1587	match label {
1588	Some((ix: usize, ReferenceLabel::Link(label: CowStr<'_>))) => RefScan::LinkLabel(label, start + ix),
1589	Some((_ix: usize, ReferenceLabel::Footnote(_label: CowStr<'_>))) => RefScan::UnexpectedFootnote,
1590	None => RefScan::Failed,
1591	}
1592	}
1593	}
1594
1595	#[derive(Clone, Default)]
1596	struct LinkStack {
1597	inner: Vec<LinkStackEl>,
1598	disabled_ix: usize,
1599	}
1600
1601	impl LinkStack {
1602	fn push(&mut self, el: LinkStackEl) {
1603	self.inner.push(el);
1604	}
1605
1606	fn pop(&mut self) -> Option<LinkStackEl> {
1607	let el = self.inner.pop();
1608	self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len());
1609	el
1610	}
1611
1612	fn clear(&mut self) {
1613	self.inner.clear();
1614	self.disabled_ix = `0`;
1615	}
1616
1617	fn disable_all_links(&mut self) {
1618	for el in &mut self.inner[self.disabled_ix..] {
1619	if el.ty == LinkStackTy::Link {
1620	el.ty = LinkStackTy::Disabled;
1621	}
1622	}
1623	self.disabled_ix = self.inner.len();
1624	}
1625	}
1626
1627	#[derive(Clone, Debug)]
1628	struct LinkStackEl {
1629	node: TreeIndex,
1630	ty: LinkStackTy,
1631	}
1632
1633	#[derive(PartialEq, Clone, Debug)]
1634	enum LinkStackTy {
1635	Link,
1636	Image,
1637	Disabled,
1638	}
1639
1640	/// Contains the destination URL, title and source span of a reference definition.
1641	#[derive(Clone, Debug)]
1642	pub struct LinkDef<'a> {
1643	pub dest: CowStr<'a>,
1644	pub title: Option<CowStr<'a>>,
1645	pub span: Range<usize>,
1646	}
1647
1648	/// Contains the destination URL, title and source span of a reference definition.
1649	#[derive(Clone, Debug)]
1650	pub struct FootnoteDef {
1651	pub use_count: usize,
1652	}
1653
1654	/// Tracks tree indices of code span delimiters of each length. It should prevent
1655	/// quadratic scanning behaviours by providing (amortized) constant time lookups.
1656	struct CodeDelims {
1657	inner: HashMap<usize, VecDeque<TreeIndex>>,
1658	seen_first: bool,
1659	}
1660
1661	impl CodeDelims {
1662	fn new() -> Self {
1663	Self {
1664	inner: Default::default(),
1665	seen_first: `false`,
1666	}
1667	}
1668
1669	fn insert(&mut self, count: usize, ix: TreeIndex) {
1670	if self.seen_first {
1671	self.inner.entry(count).or_default().push_back(ix);
1672	} else {
1673	// Skip the first insert, since that delimiter will always
1674	// be an opener and not a closer.
1675	self.seen_first = `true`;
1676	}
1677	}
1678
1679	fn is_populated(&self) -> bool {
1680	!self.inner.is_empty()
1681	}
1682
1683	fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
1684	while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
1685	if ix > open_ix {
1686	return Some(ix);
1687	}
1688	}
1689	None
1690	}
1691
1692	fn clear(&mut self) {
1693	self.inner.clear();
1694	self.seen_first = `false`;
1695	}
1696	}
1697
1698	/// Tracks brace contexts and delimiter length for math delimiters.
1699	/// Provides amortized constant-time lookups.
1700	struct MathDelims {
1701	inner: HashMap<u8, VecDeque<(TreeIndex, bool, bool)>>,
1702	}
1703
1704	impl MathDelims {
1705	fn new() -> Self {
1706	Self {
1707	inner: Default::default(),
1708	}
1709	}
1710
1711	fn insert(
1712	&mut self,
1713	delim_is_display: bool,
1714	brace_context: u8,
1715	ix: TreeIndex,
1716	can_close: bool,
1717	) {
1718	self.inner
1719	.entry(brace_context)
1720	.or_default()
1721	.push_back((ix, can_close, delim_is_display));
1722	}
1723
1724	fn is_populated(&self) -> bool {
1725	!self.inner.is_empty()
1726	}
1727
1728	fn find(
1729	&mut self,
1730	tree: &Tree<Item>,
1731	open_ix: TreeIndex,
1732	is_display: bool,
1733	brace_context: u8,
1734	) -> Option<TreeIndex> {
1735	while let Some((ix, can_close, delim_is_display)) =
1736	self.inner.get_mut(&brace_context)?.pop_front()
1737	{
1738	if ix <= open_ix \|\| (is_display && tree[open_ix].next == Some(ix)) {
1739	continue;
1740	}
1741	let can_close = can_close && tree[open_ix].item.end != tree[ix].item.start;
1742	if (!is_display && can_close) \|\| (is_display && delim_is_display) {
1743	return Some(ix);
1744	}
1745	// if we can't use it, leave it in the queue as a tombstone for the next
1746	// thing that tries to match it
1747	self.inner
1748	.get_mut(&brace_context)?
1749	.push_front((ix, can_close, delim_is_display));
1750	break;
1751	}
1752	None
1753	}
1754
1755	fn clear(&mut self) {
1756	self.inner.clear();
1757	}
1758	}
1759
1760	#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1761	pub(crate) struct LinkIndex(usize);
1762
1763	#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1764	pub(crate) struct CowIndex(usize);
1765
1766	#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1767	pub(crate) struct AlignmentIndex(usize);
1768
1769	#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1770	pub(crate) struct HeadingIndex(NonZeroUsize);
1771
1772	#[derive(Clone)]
1773	pub(crate) struct Allocations<'a> {
1774	pub refdefs: RefDefs<'a>,
1775	pub footdefs: FootnoteDefs<'a>,
1776	links: Vec<(LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>)>,
1777	cows: Vec<CowStr<'a>>,
1778	alignments: Vec<Vec<Alignment>>,
1779	headings: Vec<HeadingAttributes<'a>>,
1780	}
1781
1782	/// Used by the heading attributes extension.
1783	#[derive(Clone)]
1784	pub(crate) struct HeadingAttributes<'a> {
1785	pub id: Option<CowStr<'a>>,
1786	pub classes: Vec<CowStr<'a>>,
1787	pub attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
1788	}
1789
1790	/// Keeps track of the reference definitions defined in the document.
1791	#[derive(Clone, Default, Debug)]
1792	pub struct RefDefs<'input>(pub(crate) HashMap<LinkLabel<'input>, LinkDef<'input>>);
1793
1794	/// Keeps track of the footnote definitions defined in the document.
1795	#[derive(Clone, Default, Debug)]
1796	pub struct FootnoteDefs<'input>(pub(crate) HashMap<FootnoteLabel<'input>, FootnoteDef>);
1797
1798	impl<'input, 'b, 's> RefDefs<'input>
1799	where
1800	's: 'b,
1801	{
1802	/// Performs a lookup on reference label using unicode case folding.
1803	pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
1804	self.0.get(&UniCase::new(key.into()))
1805	}
1806
1807	/// Provides an iterator over all the document's reference definitions.
1808	pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
1809	self.0.iter().map(\|(k: &UniCase>, v: &LinkDef<'_>)\| (k.as_ref(), v))
1810	}
1811	}
1812
1813	impl<'input, 'b, 's> FootnoteDefs<'input>
1814	where
1815	's: 'b,
1816	{
1817	/// Performs a lookup on reference label using unicode case folding.
1818	pub fn contains(&'s self, key: &'b str) -> bool {
1819	self.0.contains_key(&UniCase::new(key.into()))
1820	}
1821	/// Performs a lookup on reference label using unicode case folding.
1822	pub fn get_mut(&'s mut self, key: CowStr<'input>) -> Option<&'s mut FootnoteDef> {
1823	self.0.get_mut(&UniCase::new(key))
1824	}
1825	}
1826
1827	impl<'a> Allocations<'a> {
1828	pub fn new() -> Self {
1829	Self {
1830	refdefs: RefDefs::default(),
1831	footdefs: FootnoteDefs::default(),
1832	links: Vec::with_capacity(`128`),
1833	cows: Vec::new(),
1834	alignments: Vec::new(),
1835	headings: Vec::new(),
1836	}
1837	}
1838
1839	pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
1840	let ix = self.cows.len();
1841	self.cows.push(cow);
1842	CowIndex(ix)
1843	}
1844
1845	pub fn allocate_link(
1846	&mut self,
1847	ty: LinkType,
1848	url: CowStr<'a>,
1849	title: CowStr<'a>,
1850	id: CowStr<'a>,
1851	) -> LinkIndex {
1852	let ix = self.links.len();
1853	self.links.push((ty, url, title, id));
1854	LinkIndex(ix)
1855	}
1856
1857	pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
1858	let ix = self.alignments.len();
1859	self.alignments.push(alignment);
1860	AlignmentIndex(ix)
1861	}
1862
1863	pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
1864	let ix = self.headings.len();
1865	self.headings.push(attrs);
1866	// This won't panic. `self.headings.len()` can't be `usize::MAX` since
1867	// such a long Vec cannot fit in memory.
1868	let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(`1`)).expect("too many headings");
1869	HeadingIndex(ix_nonzero)
1870	}
1871
1872	pub fn take_cow(&mut self, ix: CowIndex) -> CowStr<'a> {
1873	std::mem::replace(&mut self.cows[ix.0], "".into())
1874	}
1875
1876	pub fn take_link(&mut self, ix: LinkIndex) -> (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>) {
1877	let default_link = (LinkType::ShortcutUnknown, "".into(), "".into(), "".into());
1878	std::mem::replace(&mut self.links[ix.0], default_link)
1879	}
1880
1881	pub fn take_alignment(&mut self, ix: AlignmentIndex) -> Vec<Alignment> {
1882	std::mem::take(&mut self.alignments[ix.0])
1883	}
1884	}
1885
1886	impl<'a> Index<CowIndex> for Allocations<'a> {
1887	type Output = CowStr<'a>;
1888
1889	fn index(&self, ix: CowIndex) -> &Self::Output {
1890	self.cows.index(ix.0)
1891	}
1892	}
1893
1894	impl<'a> Index<LinkIndex> for Allocations<'a> {
1895	type Output = (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>);
1896
1897	fn index(&self, ix: LinkIndex) -> &Self::Output {
1898	self.links.index(ix.0)
1899	}
1900	}
1901
1902	impl<'a> Index<AlignmentIndex> for Allocations<'a> {
1903	type Output = Vec<Alignment>;
1904
1905	fn index(&self, ix: AlignmentIndex) -> &Self::Output {
1906	self.alignments.index(ix.0)
1907	}
1908	}
1909
1910	impl<'a> Index<HeadingIndex> for Allocations<'a> {
1911	type Output = HeadingAttributes<'a>;
1912
1913	fn index(&self, ix: HeadingIndex) -> &Self::Output {
1914	self.headings.index(ix.0.get() - `1`)
1915	}
1916	}
1917
1918	/// A struct containing information on the reachability of certain inline HTML
1919	/// elements. In particular, for cdata elements (`<![CDATA[`), processing
1920	/// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes
1921	/// represent the indices before which a scan will always fail and can hence
1922	/// be skipped.
1923	#[derive(Clone, Default)]
1924	pub(crate) struct HtmlScanGuard {
1925	pub cdata: usize,
1926	pub processing: usize,
1927	pub declaration: usize,
1928	pub comment: usize,
1929	}
1930
1931	/// Trait for broken link callbacks.
1932	///
1933	/// See [Parser::new_with_broken_link_callback].
1934	/// Automatically implemented for closures with the appropriate signature.
1935	pub trait BrokenLinkCallback<'input> {
1936	fn handle_broken_link(
1937	&mut self,
1938	link: BrokenLink<'input>,
1939	) -> Option<(CowStr<'input>, CowStr<'input>)>;
1940	}
1941
1942	impl<'input, T> BrokenLinkCallback<'input> for T
1943	where
1944	T: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
1945	{
1946	fn handle_broken_link(
1947	&mut self,
1948	link: BrokenLink<'input>,
1949	) -> Option<(CowStr<'input>, CowStr<'input>)> {
1950	self(link)
1951	}
1952	}
1953
1954	impl<'input> BrokenLinkCallback<'input> for Box<dyn BrokenLinkCallback<'input>> {
1955	fn handle_broken_link(
1956	&mut self,
1957	link: BrokenLink<'input>,
1958	) -> Option<(CowStr<'input>, CowStr<'input>)> {
1959	(**self).handle_broken_link(link)
1960	}
1961	}
1962
1963	/// Broken link callback that does nothing.
1964	#[derive(Debug)]
1965	pub struct DefaultBrokenLinkCallback;
1966
1967	impl<'input> BrokenLinkCallback<'input> for DefaultBrokenLinkCallback {
1968	fn handle_broken_link(
1969	&mut self,
1970	_link: BrokenLink<'input>,
1971	) -> Option<(CowStr<'input>, CowStr<'input>)> {
1972	None
1973	}
1974	}
1975
1976	/// Markdown event and source range iterator.
1977	///
1978	/// Generates tuples where the first element is the markdown event and the second
1979	/// is a the corresponding range in the source string.
1980	///
1981	/// Constructed from a `Parser` using its
1982	/// [`into_offset_iter`](struct.Parser.html#method.into_offset_iter) method.
1983	#[derive(Debug)]
1984	pub struct OffsetIter<'a, F = DefaultBrokenLinkCallback> {
1985	inner: Parser<'a, F>,
1986	}
1987
1988	impl<'a, F: BrokenLinkCallback<'a>> OffsetIter<'a, F> {
1989	/// Returns a reference to the internal reference definition tracker.
1990	pub fn reference_definitions(&self) -> &RefDefs<'_> {
1991	self.inner.reference_definitions()
1992	}
1993	}
1994
1995	impl<'a, F: BrokenLinkCallback<'a>> Iterator for OffsetIter<'a, F> {
1996	type Item = (Event<'a>, Range<usize>);
1997
1998	fn next(&mut self) -> Option<Self::Item> {
1999	match self.inner.tree.cur() {
2000	None => {
2001	let ix = self.inner.tree.pop()?;
2002	let tag_end = body_to_tag_end(&self.inner.tree[ix].item.body);
2003	self.inner.tree.next_sibling(ix);
2004	let span = self.inner.tree[ix].item.start..self.inner.tree[ix].item.end;
2005	debug_assert!(span.start <= span.end);
2006	Some((Event::End(tag_end), span))
2007	}
2008	Some(cur_ix) => {
2009	if self.inner.tree[cur_ix].item.body.is_inline() {
2010	self.inner.handle_inline();
2011	}
2012
2013	let node = self.inner.tree[cur_ix];
2014	let item = node.item;
2015	let event = item_to_event(item, self.inner.text, &mut self.inner.allocs);
2016	if let Event::Start(..) = event {
2017	self.inner.tree.push();
2018	} else {
2019	self.inner.tree.next_sibling(cur_ix);
2020	}
2021	debug_assert!(item.start <= item.end);
2022	Some((event, item.start..item.end))
2023	}
2024	}
2025	}
2026	}
2027
2028	fn body_to_tag_end(body: &ItemBody) -> TagEnd {
2029	match *body {
2030	ItemBody::Paragraph => TagEnd::Paragraph,
2031	ItemBody::Emphasis => TagEnd::Emphasis,
2032	ItemBody::Strong => TagEnd::Strong,
2033	ItemBody::Strikethrough => TagEnd::Strikethrough,
2034	ItemBody::Link(..) => TagEnd::Link,
2035	ItemBody::Image(..) => TagEnd::Image,
2036	ItemBody::Heading(level, _) => TagEnd::Heading(level),
2037	ItemBody::IndentCodeBlock \| ItemBody::FencedCodeBlock(..) => TagEnd::CodeBlock,
2038	ItemBody::BlockQuote(..) => TagEnd::BlockQuote,
2039	ItemBody::HtmlBlock => TagEnd::HtmlBlock,
2040	ItemBody::List(_, c, _) => {
2041	let is_ordered = c == b'.' \|\| c == b')';
2042	TagEnd::List(is_ordered)
2043	}
2044	ItemBody::ListItem(_) => TagEnd::Item,
2045	ItemBody::TableHead => TagEnd::TableHead,
2046	ItemBody::TableCell => TagEnd::TableCell,
2047	ItemBody::TableRow => TagEnd::TableRow,
2048	ItemBody::Table(..) => TagEnd::Table,
2049	ItemBody::FootnoteDefinition(..) => TagEnd::FootnoteDefinition,
2050	ItemBody::MetadataBlock(kind) => TagEnd::MetadataBlock(kind),
2051	_ => panic!("unexpected item body {:?}", body),
2052	}
2053	}
2054
2055	fn item_to_event<'a>(item: Item, text: &'a str, allocs: &mut Allocations<'a>) -> Event<'a> {
2056	let tag = match item.body {
2057	ItemBody::Text { .. } => return Event::Text(text[item.start..item.end].into()),
2058	ItemBody::Code(cow_ix) => return Event::Code(allocs.take_cow(cow_ix)),
2059	ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs.take_cow(cow_ix)),
2060	ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
2061	ItemBody::HtmlBlock => Tag::HtmlBlock,
2062	ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
2063	ItemBody::InlineHtml => return Event::InlineHtml(text[item.start..item.end].into()),
2064	ItemBody::OwnedHtml(cow_ix) => return Event::Html(allocs.take_cow(cow_ix)),
2065	ItemBody::SoftBreak => return Event::SoftBreak,
2066	ItemBody::HardBreak(_) => return Event::HardBreak,
2067	ItemBody::FootnoteReference(cow_ix) => {
2068	return Event::FootnoteReference(allocs.take_cow(cow_ix))
2069	}
2070	ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
2071	ItemBody::Rule => return Event::Rule,
2072	ItemBody::Paragraph => Tag::Paragraph,
2073	ItemBody::Emphasis => Tag::Emphasis,
2074	ItemBody::Strong => Tag::Strong,
2075	ItemBody::Strikethrough => Tag::Strikethrough,
2076	ItemBody::Link(link_ix) => {
2077	let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2078	Tag::Link {
2079	link_type,
2080	dest_url,
2081	title,
2082	id,
2083	}
2084	}
2085	ItemBody::Image(link_ix) => {
2086	let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2087	Tag::Image {
2088	link_type,
2089	dest_url,
2090	title,
2091	id,
2092	}
2093	}
2094	ItemBody::Heading(level, Some(heading_ix)) => {
2095	let HeadingAttributes { id, classes, attrs } = allocs.index(heading_ix);
2096	Tag::Heading {
2097	level,
2098	id: id.clone(),
2099	classes: classes.clone(),
2100	attrs: attrs.clone(),
2101	}
2102	}
2103	ItemBody::Heading(level, None) => Tag::Heading {
2104	level,
2105	id: None,
2106	classes: Vec::new(),
2107	attrs: Vec::new(),
2108	},
2109	ItemBody::FencedCodeBlock(cow_ix) => {
2110	Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
2111	}
2112	ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
2113	ItemBody::BlockQuote(kind) => Tag::BlockQuote(kind),
2114	ItemBody::List(_, c, listitem_start) => {
2115	if c == b'.' \|\| c == b')' {
2116	Tag::List(Some(listitem_start))
2117	} else {
2118	Tag::List(None)
2119	}
2120	}
2121	ItemBody::ListItem(_) => Tag::Item,
2122	ItemBody::TableHead => Tag::TableHead,
2123	ItemBody::TableCell => Tag::TableCell,
2124	ItemBody::TableRow => Tag::TableRow,
2125	ItemBody::Table(alignment_ix) => Tag::Table(allocs.take_alignment(alignment_ix)),
2126	ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs.take_cow(cow_ix)),
2127	ItemBody::MetadataBlock(kind) => Tag::MetadataBlock(kind),
2128	ItemBody::Math(cow_ix, is_display) => {
2129	return if is_display {
2130	Event::DisplayMath(allocs.take_cow(cow_ix))
2131	} else {
2132	Event::InlineMath(allocs.take_cow(cow_ix))
2133	}
2134	}
2135	_ => panic!("unexpected item body {:?}", item.body),
2136	};
2137
2138	Event::Start(tag)
2139	}
2140
2141	impl<'a, F: BrokenLinkCallback<'a>> Iterator for Parser<'a, F> {
2142	type Item = Event<'a>;
2143
2144	fn next(&mut self) -> Option<Event<'a>> {
2145	match self.tree.cur() {
2146	None => {
2147	let ix = self.tree.pop()?;
2148	let tag_end = body_to_tag_end(&self.tree[ix].item.body);
2149	self.tree.next_sibling(ix);
2150	Some(Event::End(tag_end))
2151	}
2152	Some(cur_ix) => {
2153	if self.tree[cur_ix].item.body.is_inline() {
2154	self.handle_inline();
2155	}
2156
2157	let node = self.tree[cur_ix];
2158	let item = node.item;
2159	let event = item_to_event(item, self.text, &mut self.allocs);
2160	if let Event::Start(ref _tag) = event {
2161	self.tree.push();
2162	} else {
2163	self.tree.next_sibling(cur_ix);
2164	}
2165	Some(event)
2166	}
2167	}
2168	}
2169	}
2170
2171	impl<'a, F: BrokenLinkCallback<'a>> FusedIterator for Parser<'a, F> {}
2172
2173	#[cfg(test)]
2174	mod test {
2175	use super::*;
2176	use crate::tree::Node;
2177
2178	// TODO: move these tests to tests/html.rs?
2179
2180	fn parser_with_extensions(text: &str) -> Parser<'_> {
2181	let mut opts = Options::empty();
2182	opts.insert(Options::ENABLE_TABLES);
2183	opts.insert(Options::ENABLE_FOOTNOTES);
2184	opts.insert(Options::ENABLE_STRIKETHROUGH);
2185	opts.insert(Options::ENABLE_TASKLISTS);
2186
2187	Parser::new_ext(text, opts)
2188	}
2189
2190	#[test]
2191	#[cfg(target_pointer_width = "64")]
2192	fn node_size() {
2193	let node_size = std::mem::size_of::<Node<Item>>();
2194	assert_eq!(`48`, node_size);
2195	}
2196
2197	#[test]
2198	#[cfg(target_pointer_width = "64")]
2199	fn body_size() {
2200	let body_size = std::mem::size_of::<ItemBody>();
2201	assert_eq!(`16`, body_size);
2202	}
2203
2204	#[test]
2205	fn single_open_fish_bracket() {
2206	// dont crash
2207	assert_eq!(`3`, Parser::new("<").count());
2208	}
2209
2210	#[test]
2211	fn lone_hashtag() {
2212	// dont crash
2213	assert_eq!(`2`, Parser::new("#").count());
2214	}
2215
2216	#[test]
2217	fn lots_of_backslashes() {
2218	// dont crash
2219	Parser::new("`\\\\\r\r`").count();
2220	Parser::new("`\\\r\r\\`.`\\\\\r\r\\`.`\\`").count();
2221	}
2222
2223	#[test]
2224	fn issue_320() {
2225	// dont crash
2226	parser_with_extensions(":`\r\t`> \|`\r`:`\r\t`> \|`\r`").count();
2227	}
2228
2229	#[test]
2230	fn issue_319() {
2231	// dont crash
2232	parser_with_extensions("\|`\r`-]([^\|`\r`-]([^").count();
2233	parser_with_extensions("\|`\r\r`=][^\|`\r\r`=][^car").count();
2234	}
2235
2236	#[test]
2237	fn issue_303() {
2238	// dont crash
2239	parser_with_extensions("[^`\r\r`a]").count();
2240	parser_with_extensions("`\r\r`]Z[^`\x00\r\r`]Z[^`\x00`").count();
2241	}
2242
2243	#[test]
2244	fn issue_313() {
2245	// dont crash
2246	parser_with_extensions("]0[^`\r\r`]0[^").count();
2247	parser_with_extensions("[^`\r`> `][^`\r`> `][^`\r`> `][").count();
2248	}
2249
2250	#[test]
2251	fn issue_311() {
2252	// dont crash
2253	parser_with_extensions("`\\\u{0d}`-`\u{09}\\\u{0d}`-`\u{09}`").count();
2254	}
2255
2256	#[test]
2257	fn issue_283() {
2258	let input = std::str::from_utf8(b"`\xf0\x9b\xb2\x9f`<td:^`\xf0\x9b\xb2\x9f`").unwrap();
2259	// dont crash
2260	parser_with_extensions(input).count();
2261	}
2262
2263	#[test]
2264	fn issue_289() {
2265	// dont crash
2266	parser_with_extensions("> - `\\\n`> - ").count();
2267	parser_with_extensions("- `\n\n`").count();
2268	}
2269
2270	#[test]
2271	fn issue_306() {
2272	// dont crash
2273	parser_with_extensions("`\r`_<__`\r`_<__`\r`_<__`\r`_<__").count();
2274	}
2275
2276	#[test]
2277	fn issue_305() {
2278	// dont crash
2279	parser_with_extensions("_6*6_*").count();
2280	}
2281
2282	#[test]
2283	fn another_emphasis_panic() {
2284	parser_with_extensions("__#_#__").count();
2285	}
2286
2287	#[test]
2288	fn offset_iter() {
2289	let event_offsets: Vec<_> = Parser::new("hello world")
2290	.into_offset_iter()
2291	.map(\|(_ev, range)\| range)
2292	.collect();
2293	let expected_offsets = vec![(`0`..`13`), (`0`..`7`), (`1`..`6`), (`0`..`7`), (`7`..`13`), (`0`..`13`)];
2294	assert_eq!(expected_offsets, event_offsets);
2295	}
2296
2297	#[test]
2298	fn reference_link_offsets() {
2299	let range =
2300	Parser::new("# H1`\n`[testing][Some reference]`\n\n`[Some reference]: https://github.com")
2301	.into_offset_iter()
2302	.filter_map(\|(ev, range)\| match ev {
2303	Event::Start(
2304	Tag::Link {
2305	link_type: LinkType::Reference,
2306	..
2307	},
2308	..,
2309	) => Some(range),
2310	_ => None,
2311	})
2312	.next()
2313	.unwrap();
2314	assert_eq!(`5`..`30`, range);
2315	}
2316
2317	#[test]
2318	fn footnote_offsets() {
2319	let range = parser_with_extensions("Testing this[^1] out.`\n\n`[^1]: Footnote.")
2320	.into_offset_iter()
2321	.filter_map(\|(ev, range)\| match ev {
2322	Event::FootnoteReference(..) => Some(range),
2323	_ => None,
2324	})
2325	.next()
2326	.unwrap();
2327	assert_eq!(`12`..`16`, range);
2328	}
2329
2330	#[test]
2331	fn footnote_offsets_exclamation() {
2332	let mut immediately_before_footnote = None;
2333	let range = parser_with_extensions("Testing this![^1] out.`\n\n`[^1]: Footnote.")
2334	.into_offset_iter()
2335	.filter_map(\|(ev, range)\| match ev {
2336	Event::FootnoteReference(..) => Some(range),
2337	_ => {
2338	immediately_before_footnote = Some((ev, range));
2339	None
2340	}
2341	})
2342	.next()
2343	.unwrap();
2344	assert_eq!(`13`..`17`, range);
2345	if let (Event::Text(exclamation), range_exclamation) =
2346	immediately_before_footnote.as_ref().unwrap()
2347	{
2348	assert_eq!("!", &exclamation[..]);
2349	assert_eq!(&(`12`..`13`), range_exclamation);
2350	} else {
2351	panic!("what came first, then? {immediately_before_footnote:?}");
2352	}
2353	}
2354
2355	#[test]
2356	fn table_offset() {
2357	let markdown = "a`\n\n`Testing\|This\|Outtt`\n`--\|:--:\|--:`\n`Some Data\|Other data\|asdf";
2358	let event_offset = parser_with_extensions(markdown)
2359	.into_offset_iter()
2360	.map(\|(_ev, range)\| range)
2361	.nth(`3`)
2362	.unwrap();
2363	let expected_offset = `3`..`59`;
2364	assert_eq!(expected_offset, event_offset);
2365	}
2366
2367	#[test]
2368	fn table_cell_span() {
2369	let markdown = "a\|b\|c`\n`--\|--\|--`\n`a\| \|c";
2370	let event_offset = parser_with_extensions(markdown)
2371	.into_offset_iter()
2372	.filter_map(\|(ev, span)\| match ev {
2373	Event::Start(Tag::TableCell) => Some(span),
2374	_ => None,
2375	})
2376	.nth(`4`)
2377	.unwrap();
2378	let expected_offset_start = "a\|b\|c`\n`--\|--\|--`\n`a\|".len();
2379	assert_eq!(
2380	expected_offset_start..(expected_offset_start + `2`),
2381	event_offset
2382	);
2383	}
2384
2385	#[test]
2386	fn offset_iter_issue_378() {
2387	let event_offsets: Vec<_> = Parser::new("a [b](c) d")
2388	.into_offset_iter()
2389	.map(\|(_ev, range)\| range)
2390	.collect();
2391	let expected_offsets = vec![(`0`..`10`), (`0`..`2`), (`2`..`8`), (`3`..`4`), (`2`..`8`), (`8`..`10`), (`0`..`10`)];
2392	assert_eq!(expected_offsets, event_offsets);
2393	}
2394
2395	#[test]
2396	fn offset_iter_issue_404() {
2397	let event_offsets: Vec<_> = Parser::new("###`\n`")
2398	.into_offset_iter()
2399	.map(\|(_ev, range)\| range)
2400	.collect();
2401	let expected_offsets = vec![(`0`..`4`), (`0`..`4`)];
2402	assert_eq!(expected_offsets, event_offsets);
2403	}
2404
2405	// FIXME: add this one regression suite
2406	#[cfg(feature = "html")]
2407	#[test]
2408	fn link_def_at_eof() {
2409	let test_str = "[My site][world]`\n\n`[world]: https://vincentprouillet.com";
2410	let expected = "<p><a href=`\"`https://vincentprouillet.com`\"`>My site</a></p>`\n`";
2411
2412	let mut buf = String::new();
2413	crate::html::push_html(&mut buf, Parser::new(test_str));
2414	assert_eq!(expected, buf);
2415	}
2416
2417	#[cfg(feature = "html")]
2418	#[test]
2419	fn no_footnote_refs_without_option() {
2420	let test_str = "a [^a]`\n\n`[^a]: yolo";
2421	let expected = "<p>a <a href=`\"`yolo`\"`>^a</a></p>`\n`";
2422
2423	let mut buf = String::new();
2424	crate::html::push_html(&mut buf, Parser::new(test_str));
2425	assert_eq!(expected, buf);
2426	}
2427
2428	#[cfg(feature = "html")]
2429	#[test]
2430	fn ref_def_at_eof() {
2431	let test_str = "[test]:`\\`";
2432	let expected = "";
2433
2434	let mut buf = String::new();
2435	crate::html::push_html(&mut buf, Parser::new(test_str));
2436	assert_eq!(expected, buf);
2437	}
2438
2439	#[cfg(feature = "html")]
2440	#[test]
2441	fn ref_def_cr_lf() {
2442	let test_str = "[a]: /u`\r\n\n`[a]";
2443	let expected = "<p><a href=`\"`/u`\"`>a</a></p>`\n`";
2444
2445	let mut buf = String::new();
2446	crate::html::push_html(&mut buf, Parser::new(test_str));
2447	assert_eq!(expected, buf);
2448	}
2449
2450	#[cfg(feature = "html")]
2451	#[test]
2452	fn no_dest_refdef() {
2453	let test_str = "[a]:";
2454	let expected = "<p>[a]:</p>`\n`";
2455
2456	let mut buf = String::new();
2457	crate::html::push_html(&mut buf, Parser::new(test_str));
2458	assert_eq!(expected, buf);
2459	}
2460
2461	#[test]
2462	fn broken_links_called_only_once() {
2463	for &(markdown, expected) in &[
2464	("See also [`g()`][crate::g].", `1`),
2465	("See also [`g()`][crate::g][].", `1`),
2466	("[brokenlink1] some other node [brokenlink2]", `2`),
2467	] {
2468	let mut times_called = `0`;
2469	let callback = &mut \|_broken_link: BrokenLink\| {
2470	times_called += `1`;
2471	None
2472	};
2473	let parser =
2474	Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
2475	for _ in parser {}
2476	assert_eq!(times_called, expected);
2477	}
2478	}
2479
2480	#[test]
2481	fn simple_broken_link_callback() {
2482	let test_str = "This is a link w/o def: [hello][world]";
2483	let mut callback = \|broken_link: BrokenLink\| {
2484	assert_eq!("world", broken_link.reference.as_ref());
2485	assert_eq!(&test_str[broken_link.span], "[hello][world]");
2486	let url = "YOLO".into();
2487	let title = "SWAG".to_owned().into();
2488	Some((url, title))
2489	};
2490	let parser =
2491	Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
2492	let mut link_tag_count = `0`;
2493	for (typ, url, title, id) in parser.filter_map(\|event\| match event {
2494	Event::Start(tag) => match tag {
2495	Tag::Link {
2496	link_type,
2497	dest_url,
2498	title,
2499	id,
2500	} => Some((link_type, dest_url, title, id)),
2501	_ => None,
2502	},
2503	_ => None,
2504	}) {
2505	link_tag_count += `1`;
2506	assert_eq!(typ, LinkType::ReferenceUnknown);
2507	assert_eq!(url.as_ref(), "YOLO");
2508	assert_eq!(title.as_ref(), "SWAG");
2509	assert_eq!(id.as_ref(), "world");
2510	}
2511	assert!(link_tag_count > `0`);
2512	}
2513
2514	#[test]
2515	fn code_block_kind_check_fenced() {
2516	let parser = Parser::new("hello`\n````test`\n`tadam`\n````");
2517	let mut found = `0`;
2518	for (ev, _range) in parser.into_offset_iter() {
2519	match ev {
2520	Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) => {
2521	assert_eq!(syntax.as_ref(), "test");
2522	found += `1`;
2523	}
2524	_ => {}
2525	}
2526	}
2527	assert_eq!(found, `1`);
2528	}
2529
2530	#[test]
2531	fn code_block_kind_check_indented() {
2532	let parser = Parser::new("hello`\n\n` ```test`\n` tadam`\n`hello");
2533	let mut found = `0`;
2534	for (ev, _range) in parser.into_offset_iter() {
2535	match ev {
2536	Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
2537	found += `1`;
2538	}
2539	_ => {}
2540	}
2541	}
2542	assert_eq!(found, `1`);
2543	}
2544
2545	#[test]
2546	fn ref_defs() {
2547	let input = r###"[a B c]: http://example.com
2548	[another]: https://google.com
2549
2550	text
2551
2552	[final ONE]: http://wikipedia.org
2553	"###;
2554	let mut parser = Parser::new(input);
2555
2556	assert!(parser.reference_definitions().get("a b c").is_some());
2557	assert!(parser.reference_definitions().get("nope").is_none());
2558
2559	if let Some(_event) = parser.next() {
2560	// testing keys with shorter lifetimes than parser and its input
2561	let s = "final one".to_owned();
2562	let link_def = parser.reference_definitions().get(&s).unwrap();
2563	let span = &input[link_def.span.clone()];
2564	assert_eq!(span, "[final ONE]: http://wikipedia.org");
2565	}
2566	}
2567
2568	#[test]
2569	fn common_lifetime_patterns_allowed<'b>() {
2570	let temporary_str = String::from("xyz");
2571
2572	// NOTE: this is a limitation of Rust, it doesn't allow putting lifetime parameters on the closure itself.
2573	// Hack it by attaching the lifetime to the test function instead.
2574	// TODO: why is the `'b` lifetime required at all? Changing it to `'_` breaks things :(
2575	let mut closure = \|link: BrokenLink<'b>\| Some(("#".into(), link.reference));
2576
2577	fn function(link: BrokenLink<'_>) -> Option<(CowStr<'_>, CowStr<'_>)> {
2578	Some(("#".into(), link.reference))
2579	}
2580
2581	for _ in Parser::new_with_broken_link_callback(
2582	"static lifetime",
2583	Options::empty(),
2584	Some(&mut closure),
2585	) {}
2586	/ This fails to compile. Because the closure can't say `for <'a> fn(BrokenLink<'a>) ->*
2587	* CowStr<'a>` and has to use the enclosing `'b` lifetime parameter, `temporary_str` lives
2588	* shorter than `'b`. I think this is unlikely to occur in real life, and if it does, the
2589	* fix is simple: move it out to a function that allows annotating the lifetimes.
2590	*/
2591	//for _ in Parser::new_with_broken_link_callback(&temporary_str, Options::empty(), Some(&mut callback)) {
2592	//}
2593
2594	for _ in Parser::new_with_broken_link_callback(
2595	"static lifetime",
2596	Options::empty(),
2597	Some(&mut function),
2598	) {}
2599	for _ in Parser::new_with_broken_link_callback(
2600	&temporary_str,
2601	Options::empty(),
2602	Some(&mut function),
2603	) {}
2604	}
2605	}
2606