1 | // Copyright 2017 Google Inc. All rights reserved. |
2 | // |
3 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
4 | // of this software and associated documentation files (the "Software"), to deal |
5 | // in the Software without restriction, including without limitation the rights |
6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
7 | // copies of the Software, and to permit persons to whom the Software is |
8 | // furnished to do so, subject to the following conditions: |
9 | // |
10 | // The above copyright notice and this permission notice shall be included in |
11 | // all copies or substantial portions of the Software. |
12 | // |
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
19 | // THE SOFTWARE. |
20 | |
21 | //! Tree-based two pass parser. |
22 | |
23 | use std::cmp::{max, min}; |
24 | use std::collections::{HashMap, VecDeque}; |
25 | use std::iter::FusedIterator; |
26 | use std::num::NonZeroUsize; |
27 | use std::ops::{Index, Range}; |
28 | |
29 | use unicase::UniCase; |
30 | |
31 | use crate::firstpass::run_first_pass; |
32 | use crate::linklabel::{scan_link_label_rest, LinkLabel, ReferenceLabel}; |
33 | use crate::scanners::*; |
34 | use crate::strings::CowStr; |
35 | use crate::tree::{Tree, TreeIndex}; |
36 | use crate::{Alignment, CodeBlockKind, Event, HeadingLevel, LinkType, Options, Tag}; |
37 | |
38 | // Allowing arbitrary depth nested parentheses inside link destinations |
39 | // can create denial of service vulnerabilities if we're not careful. |
40 | // The simplest countermeasure is to limit their depth, which is |
41 | // explicitly allowed by the spec as long as the limit is at least 3: |
42 | // https://spec.commonmark.org/0.29/#link-destination |
43 | const LINK_MAX_NESTED_PARENS: usize = 5; |
44 | |
45 | #[derive (Debug, Default, Clone, Copy)] |
46 | pub(crate) struct Item { |
47 | pub start: usize, |
48 | pub end: usize, |
49 | pub body: ItemBody, |
50 | } |
51 | |
52 | #[derive (Debug, PartialEq, Clone, Copy)] |
53 | pub(crate) enum ItemBody { |
54 | Paragraph, |
55 | Text, |
56 | SoftBreak, |
57 | HardBreak, |
58 | |
59 | // These are possible inline items, need to be resolved in second pass. |
60 | |
61 | // repeats, can_open, can_close |
62 | MaybeEmphasis(usize, bool, bool), |
63 | // quote byte, can_open, can_close |
64 | MaybeSmartQuote(u8, bool, bool), |
65 | MaybeCode(usize, bool), // number of backticks, preceded by backslash |
66 | MaybeHtml, |
67 | MaybeLinkOpen, |
68 | // bool indicates whether or not the preceding section could be a reference |
69 | MaybeLinkClose(bool), |
70 | MaybeImage, |
71 | |
72 | // These are inline items after resolution. |
73 | Emphasis, |
74 | Strong, |
75 | Strikethrough, |
76 | Code(CowIndex), |
77 | Link(LinkIndex), |
78 | Image(LinkIndex), |
79 | FootnoteReference(CowIndex), |
80 | TaskListMarker(bool), // true for checked |
81 | |
82 | Rule, |
83 | Heading(HeadingLevel, Option<HeadingIndex>), // heading level |
84 | FencedCodeBlock(CowIndex), |
85 | IndentCodeBlock, |
86 | Html, |
87 | OwnedHtml(CowIndex), |
88 | BlockQuote, |
89 | List(bool, u8, u64), // is_tight, list character, list start index |
90 | ListItem(usize), // indent level |
91 | SynthesizeText(CowIndex), |
92 | SynthesizeChar(char), |
93 | FootnoteDefinition(CowIndex), |
94 | |
95 | // Tables |
96 | Table(AlignmentIndex), |
97 | TableHead, |
98 | TableRow, |
99 | TableCell, |
100 | |
101 | // Dummy node at the top of the tree - should not be used otherwise! |
102 | Root, |
103 | } |
104 | |
105 | impl<'a> ItemBody { |
106 | fn is_inline(&self) -> bool { |
107 | matches!( |
108 | *self, |
109 | ItemBody::MaybeEmphasis(..) |
110 | | ItemBody::MaybeSmartQuote(..) |
111 | | ItemBody::MaybeHtml |
112 | | ItemBody::MaybeCode(..) |
113 | | ItemBody::MaybeLinkOpen |
114 | | ItemBody::MaybeLinkClose(..) |
115 | | ItemBody::MaybeImage |
116 | ) |
117 | } |
118 | } |
119 | |
120 | impl<'a> Default for ItemBody { |
121 | fn default() -> Self { |
122 | ItemBody::Root |
123 | } |
124 | } |
125 | |
126 | #[derive (Debug)] |
127 | pub struct BrokenLink<'a> { |
128 | pub span: std::ops::Range<usize>, |
129 | pub link_type: LinkType, |
130 | pub reference: CowStr<'a>, |
131 | } |
132 | |
133 | /// Markdown event iterator. |
134 | pub struct Parser<'input, 'callback> { |
135 | text: &'input str, |
136 | options: Options, |
137 | tree: Tree<Item>, |
138 | allocs: Allocations<'input>, |
139 | broken_link_callback: BrokenLinkCallback<'input, 'callback>, |
140 | html_scan_guard: HtmlScanGuard, |
141 | |
142 | // used by inline passes. store them here for reuse |
143 | inline_stack: InlineStack, |
144 | link_stack: LinkStack, |
145 | } |
146 | |
147 | impl<'input, 'callback> std::fmt::Debug for Parser<'input, 'callback> { |
148 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
149 | // Only print the fileds that have public types. |
150 | f&mut DebugStruct<'_, '_>.debug_struct("Parser" ) |
151 | .field("text" , &self.text) |
152 | .field("options" , &self.options) |
153 | .field( |
154 | name:"broken_link_callback" , |
155 | &self.broken_link_callback.as_ref().map(|_| ..), |
156 | ) |
157 | .finish() |
158 | } |
159 | } |
160 | |
161 | impl<'input, 'callback> Parser<'input, 'callback> { |
162 | /// Creates a new event iterator for a markdown string without any options enabled. |
163 | pub fn new(text: &'input str) -> Self { |
164 | Parser::new_ext(text, Options::empty()) |
165 | } |
166 | |
167 | /// Creates a new event iterator for a markdown string with given options. |
168 | pub fn new_ext(text: &'input str, options: Options) -> Self { |
169 | Parser::new_with_broken_link_callback(text, options, None) |
170 | } |
171 | |
172 | /// In case the parser encounters any potential links that have a broken |
173 | /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom) |
174 | /// the provided callback will be called with the reference name, |
175 | /// and the returned pair will be used as the link name and title if it is not |
176 | /// `None`. |
177 | pub fn new_with_broken_link_callback( |
178 | text: &'input str, |
179 | options: Options, |
180 | broken_link_callback: BrokenLinkCallback<'input, 'callback>, |
181 | ) -> Self { |
182 | let (mut tree, allocs) = run_first_pass(text, options); |
183 | tree.reset(); |
184 | let inline_stack = Default::default(); |
185 | let link_stack = Default::default(); |
186 | let html_scan_guard = Default::default(); |
187 | Parser { |
188 | text, |
189 | options, |
190 | tree, |
191 | allocs, |
192 | broken_link_callback, |
193 | inline_stack, |
194 | link_stack, |
195 | html_scan_guard, |
196 | } |
197 | } |
198 | |
199 | /// Returns a reference to the internal `RefDefs` object, which provides access |
200 | /// to the internal map of reference definitions. |
201 | pub fn reference_definitions(&self) -> &RefDefs { |
202 | &self.allocs.refdefs |
203 | } |
204 | |
205 | /// Handle inline markup. |
206 | /// |
207 | /// When the parser encounters any item indicating potential inline markup, all |
208 | /// inline markup passes are run on the remainder of the chain. |
209 | /// |
210 | /// Note: there's some potential for optimization here, but that's future work. |
211 | fn handle_inline(&mut self) { |
212 | self.handle_inline_pass1(); |
213 | self.handle_emphasis(); |
214 | } |
215 | |
216 | /// Handle inline HTML, code spans, and links. |
217 | /// |
218 | /// This function handles both inline HTML and code spans, because they have |
219 | /// the same precedence. It also handles links, even though they have lower |
220 | /// precedence, because the URL of links must not be processed. |
221 | fn handle_inline_pass1(&mut self) { |
222 | let mut code_delims = CodeDelims::new(); |
223 | let mut cur = self.tree.cur(); |
224 | let mut prev = None; |
225 | |
226 | let block_end = self.tree[self.tree.peek_up().unwrap()].item.end; |
227 | let block_text = &self.text[..block_end]; |
228 | |
229 | while let Some(mut cur_ix) = cur { |
230 | match self.tree[cur_ix].item.body { |
231 | ItemBody::MaybeHtml => { |
232 | let next = self.tree[cur_ix].next; |
233 | let autolink = if let Some(next_ix) = next { |
234 | scan_autolink(block_text, self.tree[next_ix].item.start) |
235 | } else { |
236 | None |
237 | }; |
238 | |
239 | if let Some((ix, uri, link_type)) = autolink { |
240 | let node = scan_nodes_to_ix(&self.tree, next, ix); |
241 | let text_node = self.tree.create_node(Item { |
242 | start: self.tree[cur_ix].item.start + 1, |
243 | end: ix - 1, |
244 | body: ItemBody::Text, |
245 | }); |
246 | let link_ix = self.allocs.allocate_link(link_type, uri, "" .into()); |
247 | self.tree[cur_ix].item.body = ItemBody::Link(link_ix); |
248 | self.tree[cur_ix].item.end = ix; |
249 | self.tree[cur_ix].next = node; |
250 | self.tree[cur_ix].child = Some(text_node); |
251 | prev = cur; |
252 | cur = node; |
253 | if let Some(node_ix) = cur { |
254 | self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix); |
255 | } |
256 | continue; |
257 | } else { |
258 | let inline_html = next.and_then(|next_ix| { |
259 | self.scan_inline_html( |
260 | block_text.as_bytes(), |
261 | self.tree[next_ix].item.start, |
262 | ) |
263 | }); |
264 | if let Some((span, ix)) = inline_html { |
265 | let node = scan_nodes_to_ix(&self.tree, next, ix); |
266 | self.tree[cur_ix].item.body = if !span.is_empty() { |
267 | let converted_string = |
268 | String::from_utf8(span).expect("invalid utf8" ); |
269 | ItemBody::OwnedHtml( |
270 | self.allocs.allocate_cow(converted_string.into()), |
271 | ) |
272 | } else { |
273 | ItemBody::Html |
274 | }; |
275 | self.tree[cur_ix].item.end = ix; |
276 | self.tree[cur_ix].next = node; |
277 | prev = cur; |
278 | cur = node; |
279 | if let Some(node_ix) = cur { |
280 | self.tree[node_ix].item.start = |
281 | max(self.tree[node_ix].item.start, ix); |
282 | } |
283 | continue; |
284 | } |
285 | } |
286 | self.tree[cur_ix].item.body = ItemBody::Text; |
287 | } |
288 | ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => { |
289 | if preceded_by_backslash { |
290 | search_count -= 1; |
291 | if search_count == 0 { |
292 | self.tree[cur_ix].item.body = ItemBody::Text; |
293 | prev = cur; |
294 | cur = self.tree[cur_ix].next; |
295 | continue; |
296 | } |
297 | } |
298 | |
299 | if code_delims.is_populated() { |
300 | // we have previously scanned all codeblock delimiters, |
301 | // so we can reuse that work |
302 | if let Some(scan_ix) = code_delims.find(cur_ix, search_count) { |
303 | self.make_code_span(cur_ix, scan_ix, preceded_by_backslash); |
304 | } else { |
305 | self.tree[cur_ix].item.body = ItemBody::Text; |
306 | } |
307 | } else { |
308 | // we haven't previously scanned all codeblock delimiters, |
309 | // so walk the AST |
310 | let mut scan = if search_count > 0 { |
311 | self.tree[cur_ix].next |
312 | } else { |
313 | None |
314 | }; |
315 | while let Some(scan_ix) = scan { |
316 | if let ItemBody::MaybeCode(delim_count, _) = |
317 | self.tree[scan_ix].item.body |
318 | { |
319 | if search_count == delim_count { |
320 | self.make_code_span(cur_ix, scan_ix, preceded_by_backslash); |
321 | code_delims.clear(); |
322 | break; |
323 | } else { |
324 | code_delims.insert(delim_count, scan_ix); |
325 | } |
326 | } |
327 | scan = self.tree[scan_ix].next; |
328 | } |
329 | if scan == None { |
330 | self.tree[cur_ix].item.body = ItemBody::Text; |
331 | } |
332 | } |
333 | } |
334 | ItemBody::MaybeLinkOpen => { |
335 | self.tree[cur_ix].item.body = ItemBody::Text; |
336 | self.link_stack.push(LinkStackEl { |
337 | node: cur_ix, |
338 | ty: LinkStackTy::Link, |
339 | }); |
340 | } |
341 | ItemBody::MaybeImage => { |
342 | self.tree[cur_ix].item.body = ItemBody::Text; |
343 | self.link_stack.push(LinkStackEl { |
344 | node: cur_ix, |
345 | ty: LinkStackTy::Image, |
346 | }); |
347 | } |
348 | ItemBody::MaybeLinkClose(could_be_ref) => { |
349 | self.tree[cur_ix].item.body = ItemBody::Text; |
350 | if let Some(tos) = self.link_stack.pop() { |
351 | if tos.ty == LinkStackTy::Disabled { |
352 | continue; |
353 | } |
354 | let next = self.tree[cur_ix].next; |
355 | if let Some((next_ix, url, title)) = |
356 | self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next) |
357 | { |
358 | let next_node = scan_nodes_to_ix(&self.tree, next, next_ix); |
359 | if let Some(prev_ix) = prev { |
360 | self.tree[prev_ix].next = None; |
361 | } |
362 | cur = Some(tos.node); |
363 | cur_ix = tos.node; |
364 | let link_ix = self.allocs.allocate_link(LinkType::Inline, url, title); |
365 | self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image { |
366 | ItemBody::Image(link_ix) |
367 | } else { |
368 | ItemBody::Link(link_ix) |
369 | }; |
370 | self.tree[cur_ix].child = self.tree[cur_ix].next; |
371 | self.tree[cur_ix].next = next_node; |
372 | self.tree[cur_ix].item.end = next_ix; |
373 | if let Some(next_node_ix) = next_node { |
374 | self.tree[next_node_ix].item.start = |
375 | max(self.tree[next_node_ix].item.start, next_ix); |
376 | } |
377 | |
378 | if tos.ty == LinkStackTy::Link { |
379 | self.link_stack.disable_all_links(); |
380 | } |
381 | } else { |
382 | // ok, so its not an inline link. maybe it is a reference |
383 | // to a defined link? |
384 | let scan_result = scan_reference( |
385 | &self.tree, |
386 | block_text, |
387 | next, |
388 | self.options.contains(Options::ENABLE_FOOTNOTES), |
389 | ); |
390 | let (node_after_link, link_type) = match scan_result { |
391 | // [label][reference] |
392 | RefScan::LinkLabel(_, end_ix) => { |
393 | // Toggle reference viability of the last closing bracket, |
394 | // so that we can skip it on future iterations in case |
395 | // it fails in this one. In particular, we won't call |
396 | // the broken link callback twice on one reference. |
397 | let reference_close_node = if let Some(node) = |
398 | scan_nodes_to_ix(&self.tree, next, end_ix - 1) |
399 | { |
400 | node |
401 | } else { |
402 | continue; |
403 | }; |
404 | self.tree[reference_close_node].item.body = |
405 | ItemBody::MaybeLinkClose(false); |
406 | let next_node = self.tree[reference_close_node].next; |
407 | |
408 | (next_node, LinkType::Reference) |
409 | } |
410 | // [reference][] |
411 | RefScan::Collapsed(next_node) => { |
412 | // This reference has already been tried, and it's not |
413 | // valid. Skip it. |
414 | if !could_be_ref { |
415 | continue; |
416 | } |
417 | (next_node, LinkType::Collapsed) |
418 | } |
419 | // [shortcut] |
420 | // |
421 | // [shortcut]: /blah |
422 | RefScan::Failed => { |
423 | if !could_be_ref { |
424 | continue; |
425 | } |
426 | (next, LinkType::Shortcut) |
427 | } |
428 | }; |
429 | |
430 | // FIXME: references and labels are mixed in the naming of variables |
431 | // below. Disambiguate! |
432 | |
433 | // (label, source_ix end) |
434 | let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result { |
435 | RefScan::LinkLabel(l, end_ix) => { |
436 | Some((ReferenceLabel::Link(l), end_ix)) |
437 | } |
438 | RefScan::Collapsed(..) | RefScan::Failed => { |
439 | // No label? maybe it is a shortcut reference |
440 | let label_start = self.tree[tos.node].item.end - 1; |
441 | let label_end = self.tree[cur_ix].item.end; |
442 | scan_link_label( |
443 | &self.tree, |
444 | &self.text[label_start..label_end], |
445 | self.options.contains(Options::ENABLE_FOOTNOTES), |
446 | ) |
447 | .map(|(ix, label)| (label, label_start + ix)) |
448 | .filter(|(_, end)| *end == label_end) |
449 | } |
450 | }; |
451 | |
452 | // see if it's a footnote reference |
453 | if let Some((ReferenceLabel::Footnote(l), end)) = label { |
454 | self.tree[tos.node].next = node_after_link; |
455 | self.tree[tos.node].child = None; |
456 | self.tree[tos.node].item.body = |
457 | ItemBody::FootnoteReference(self.allocs.allocate_cow(l)); |
458 | self.tree[tos.node].item.end = end; |
459 | prev = Some(tos.node); |
460 | cur = node_after_link; |
461 | self.link_stack.clear(); |
462 | continue; |
463 | } else if let Some((ReferenceLabel::Link(link_label), end)) = label { |
464 | let type_url_title = self |
465 | .allocs |
466 | .refdefs |
467 | .get(link_label.as_ref()) |
468 | .map(|matching_def| { |
469 | // found a matching definition! |
470 | let title = matching_def |
471 | .title |
472 | .as_ref() |
473 | .cloned() |
474 | .unwrap_or_else(|| "" .into()); |
475 | let url = matching_def.dest.clone(); |
476 | (link_type, url, title) |
477 | }) |
478 | .or_else(|| { |
479 | match self.broken_link_callback.as_mut() { |
480 | Some(callback) => { |
481 | // Construct a BrokenLink struct, which will be passed to the callback |
482 | let broken_link = BrokenLink { |
483 | span: (self.tree[tos.node].item.start)..end, |
484 | link_type, |
485 | reference: link_label, |
486 | }; |
487 | |
488 | callback(broken_link).map(|(url, title)| { |
489 | (link_type.to_unknown(), url, title) |
490 | }) |
491 | } |
492 | None => None, |
493 | } |
494 | }); |
495 | |
496 | if let Some((def_link_type, url, title)) = type_url_title { |
497 | let link_ix = |
498 | self.allocs.allocate_link(def_link_type, url, title); |
499 | self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image |
500 | { |
501 | ItemBody::Image(link_ix) |
502 | } else { |
503 | ItemBody::Link(link_ix) |
504 | }; |
505 | let label_node = self.tree[tos.node].next; |
506 | |
507 | // lets do some tree surgery to add the link to the tree |
508 | // 1st: skip the label node and close node |
509 | self.tree[tos.node].next = node_after_link; |
510 | |
511 | // then, if it exists, add the label node as a child to the link node |
512 | if label_node != cur { |
513 | self.tree[tos.node].child = label_node; |
514 | |
515 | // finally: disconnect list of children |
516 | if let Some(prev_ix) = prev { |
517 | self.tree[prev_ix].next = None; |
518 | } |
519 | } |
520 | |
521 | self.tree[tos.node].item.end = end; |
522 | |
523 | // set up cur so next node will be node_after_link |
524 | cur = Some(tos.node); |
525 | cur_ix = tos.node; |
526 | |
527 | if tos.ty == LinkStackTy::Link { |
528 | self.link_stack.disable_all_links(); |
529 | } |
530 | } |
531 | } |
532 | } |
533 | } |
534 | } |
535 | _ => (), |
536 | } |
537 | prev = cur; |
538 | cur = self.tree[cur_ix].next; |
539 | } |
540 | self.link_stack.clear(); |
541 | } |
542 | |
543 | fn handle_emphasis(&mut self) { |
544 | let mut prev = None; |
545 | let mut prev_ix: TreeIndex; |
546 | let mut cur = self.tree.cur(); |
547 | |
548 | let mut single_quote_open: Option<TreeIndex> = None; |
549 | let mut double_quote_open: bool = false; |
550 | |
551 | while let Some(mut cur_ix) = cur { |
552 | match self.tree[cur_ix].item.body { |
553 | ItemBody::MaybeEmphasis(mut count, can_open, can_close) => { |
554 | let c = self.text.as_bytes()[self.tree[cur_ix].item.start]; |
555 | let both = can_open && can_close; |
556 | if can_close { |
557 | while let Some(el) = |
558 | self.inline_stack.find_match(&mut self.tree, c, count, both) |
559 | { |
560 | // have a match! |
561 | if let Some(prev_ix) = prev { |
562 | self.tree[prev_ix].next = None; |
563 | } |
564 | let match_count = min(count, el.count); |
565 | // start, end are tree node indices |
566 | let mut end = cur_ix - 1; |
567 | let mut start = el.start + el.count; |
568 | |
569 | // work from the inside out |
570 | while start > el.start + el.count - match_count { |
571 | let inc = if start > el.start + el.count - match_count + 1 { |
572 | 2 |
573 | } else { |
574 | 1 |
575 | }; |
576 | let ty = if c == b'~' { |
577 | ItemBody::Strikethrough |
578 | } else if inc == 2 { |
579 | ItemBody::Strong |
580 | } else { |
581 | ItemBody::Emphasis |
582 | }; |
583 | |
584 | let root = start - inc; |
585 | end = end + inc; |
586 | self.tree[root].item.body = ty; |
587 | self.tree[root].item.end = self.tree[end].item.end; |
588 | self.tree[root].child = Some(start); |
589 | self.tree[root].next = None; |
590 | start = root; |
591 | } |
592 | |
593 | // set next for top most emph level |
594 | prev_ix = el.start + el.count - match_count; |
595 | prev = Some(prev_ix); |
596 | cur = self.tree[cur_ix + match_count - 1].next; |
597 | self.tree[prev_ix].next = cur; |
598 | |
599 | if el.count > match_count { |
600 | self.inline_stack.push(InlineEl { |
601 | start: el.start, |
602 | count: el.count - match_count, |
603 | c: el.c, |
604 | both, |
605 | }) |
606 | } |
607 | count -= match_count; |
608 | if count > 0 { |
609 | cur_ix = cur.unwrap(); |
610 | } else { |
611 | break; |
612 | } |
613 | } |
614 | } |
615 | if count > 0 { |
616 | if can_open { |
617 | self.inline_stack.push(InlineEl { |
618 | start: cur_ix, |
619 | count, |
620 | c, |
621 | both, |
622 | }); |
623 | } else { |
624 | for i in 0..count { |
625 | self.tree[cur_ix + i].item.body = ItemBody::Text; |
626 | } |
627 | } |
628 | prev_ix = cur_ix + count - 1; |
629 | prev = Some(prev_ix); |
630 | cur = self.tree[prev_ix].next; |
631 | } |
632 | } |
633 | ItemBody::MaybeSmartQuote(c, can_open, can_close) => { |
634 | self.tree[cur_ix].item.body = match c { |
635 | b' \'' => { |
636 | if let (Some(open_ix), true) = (single_quote_open, can_close) { |
637 | self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘' ); |
638 | single_quote_open = None; |
639 | } else if can_open { |
640 | single_quote_open = Some(cur_ix); |
641 | } |
642 | ItemBody::SynthesizeChar('’' ) |
643 | } |
644 | _ /* double quote */ => { |
645 | if can_close && double_quote_open { |
646 | double_quote_open = false; |
647 | ItemBody::SynthesizeChar('”' ) |
648 | } else { |
649 | if can_open && !double_quote_open { |
650 | double_quote_open = true; |
651 | } |
652 | ItemBody::SynthesizeChar('“' ) |
653 | } |
654 | } |
655 | }; |
656 | prev = cur; |
657 | cur = self.tree[cur_ix].next; |
658 | } |
659 | _ => { |
660 | prev = cur; |
661 | cur = self.tree[cur_ix].next; |
662 | } |
663 | } |
664 | } |
665 | self.inline_stack.pop_all(&mut self.tree); |
666 | } |
667 | |
668 | /// Returns next byte index, url and title. |
669 | fn scan_inline_link( |
670 | &self, |
671 | underlying: &'input str, |
672 | mut ix: usize, |
673 | node: Option<TreeIndex>, |
674 | ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> { |
675 | if scan_ch(&underlying.as_bytes()[ix..], b'(' ) == 0 { |
676 | return None; |
677 | } |
678 | ix += 1; |
679 | ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace); |
680 | |
681 | let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?; |
682 | let dest = unescape(dest); |
683 | ix += dest_length; |
684 | |
685 | ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace); |
686 | |
687 | let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) { |
688 | ix += bytes_scanned; |
689 | ix += scan_while(&underlying.as_bytes()[ix..], is_ascii_whitespace); |
690 | t |
691 | } else { |
692 | "" .into() |
693 | }; |
694 | if scan_ch(&underlying.as_bytes()[ix..], b')' ) == 0 { |
695 | return None; |
696 | } |
697 | ix += 1; |
698 | |
699 | Some((ix, dest, title)) |
700 | } |
701 | |
702 | // returns (bytes scanned, title cow) |
703 | fn scan_link_title( |
704 | &self, |
705 | text: &'input str, |
706 | start_ix: usize, |
707 | node: Option<TreeIndex>, |
708 | ) -> Option<(usize, CowStr<'input>)> { |
709 | let bytes = text.as_bytes(); |
710 | let open = match bytes.get(start_ix) { |
711 | Some(b @ b' \'' ) | Some(b @ b' \"' ) | Some(b @ b'(' ) => *b, |
712 | _ => return None, |
713 | }; |
714 | let close = if open == b'(' { b')' } else { open }; |
715 | |
716 | let mut title = String::new(); |
717 | let mut mark = start_ix + 1; |
718 | let mut i = start_ix + 1; |
719 | |
720 | while i < bytes.len() { |
721 | let c = bytes[i]; |
722 | |
723 | if c == close { |
724 | let cow = if mark == 1 { |
725 | (i - start_ix + 1, text[mark..i].into()) |
726 | } else { |
727 | title.push_str(&text[mark..i]); |
728 | (i - start_ix + 1, title.into()) |
729 | }; |
730 | |
731 | return Some(cow); |
732 | } |
733 | if c == open { |
734 | return None; |
735 | } |
736 | |
737 | if c == b' \n' || c == b' \r' { |
738 | if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) { |
739 | if self.tree[node_ix].item.start > i { |
740 | title.push_str(&text[mark..i]); |
741 | title.push(' \n' ); |
742 | i = self.tree[node_ix].item.start; |
743 | mark = i; |
744 | continue; |
745 | } |
746 | } |
747 | } |
748 | if c == b'&' { |
749 | if let (n, Some(value)) = scan_entity(&bytes[i..]) { |
750 | title.push_str(&text[mark..i]); |
751 | title.push_str(&value); |
752 | i += n; |
753 | mark = i; |
754 | continue; |
755 | } |
756 | } |
757 | if c == b' \\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) { |
758 | title.push_str(&text[mark..i]); |
759 | i += 1; |
760 | mark = i; |
761 | } |
762 | |
763 | i += 1; |
764 | } |
765 | |
766 | None |
767 | } |
768 | |
769 | /// Make a code span. |
770 | /// |
771 | /// Both `open` and `close` are matching MaybeCode items. |
772 | fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) { |
773 | let first_ix = self.tree[open].next.unwrap(); |
774 | let bytes = self.text.as_bytes(); |
775 | let mut span_start = self.tree[open].item.end; |
776 | let mut span_end = self.tree[close].item.start; |
777 | let mut buf: Option<String> = None; |
778 | |
779 | // detect all-space sequences, since they are kept as-is as of commonmark 0.29 |
780 | if !bytes[span_start..span_end].iter().all(|&b| b == b' ' ) { |
781 | let opening = matches!(bytes[span_start], b' ' | b' \r' | b' \n' ); |
782 | let closing = matches!(bytes[span_end - 1], b' ' | b' \r' | b' \n' ); |
783 | let drop_enclosing_whitespace = opening && closing; |
784 | |
785 | if drop_enclosing_whitespace { |
786 | span_start += 1; |
787 | if span_start < span_end { |
788 | span_end -= 1; |
789 | } |
790 | } |
791 | |
792 | let mut ix = first_ix; |
793 | |
794 | while ix != close { |
795 | let next_ix = self.tree[ix].next.unwrap(); |
796 | if let ItemBody::HardBreak | ItemBody::SoftBreak = self.tree[ix].item.body { |
797 | if drop_enclosing_whitespace { |
798 | // check whether break should be ignored |
799 | if ix == first_ix { |
800 | ix = next_ix; |
801 | span_start = min(span_end, self.tree[ix].item.start); |
802 | continue; |
803 | } else if next_ix == close && ix > first_ix { |
804 | break; |
805 | } |
806 | } |
807 | |
808 | let end = bytes[self.tree[ix].item.start..] |
809 | .iter() |
810 | .position(|&b| b == b' \r' || b == b' \n' ) |
811 | .unwrap() |
812 | + self.tree[ix].item.start; |
813 | if let Some(ref mut buf) = buf { |
814 | buf.push_str(&self.text[self.tree[ix].item.start..end]); |
815 | buf.push(' ' ); |
816 | } else { |
817 | let mut new_buf = String::with_capacity(span_end - span_start); |
818 | new_buf.push_str(&self.text[span_start..end]); |
819 | new_buf.push(' ' ); |
820 | buf = Some(new_buf); |
821 | } |
822 | } else if let Some(ref mut buf) = buf { |
823 | let end = if next_ix == close { |
824 | span_end |
825 | } else { |
826 | self.tree[ix].item.end |
827 | }; |
828 | buf.push_str(&self.text[self.tree[ix].item.start..end]); |
829 | } |
830 | ix = next_ix; |
831 | } |
832 | } |
833 | |
834 | let cow = if let Some(buf) = buf { |
835 | buf.into() |
836 | } else { |
837 | self.text[span_start..span_end].into() |
838 | }; |
839 | if preceding_backslash { |
840 | self.tree[open].item.body = ItemBody::Text; |
841 | self.tree[open].item.end = self.tree[open].item.start + 1; |
842 | self.tree[open].next = Some(close); |
843 | self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow)); |
844 | self.tree[close].item.start = self.tree[open].item.start + 1; |
845 | } else { |
846 | self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow)); |
847 | self.tree[open].item.end = self.tree[close].item.end; |
848 | self.tree[open].next = self.tree[close].next; |
849 | } |
850 | } |
851 | |
852 | /// On success, returns a buffer containing the inline html and byte offset. |
853 | /// When no bytes were skipped, the buffer will be empty and the html can be |
854 | /// represented as a subslice of the input string. |
855 | fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> { |
856 | let c = *bytes.get(ix)?; |
857 | if c == b'!' { |
858 | Some(( |
859 | vec![], |
860 | scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?, |
861 | )) |
862 | } else if c == b'?' { |
863 | Some(( |
864 | vec![], |
865 | scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?, |
866 | )) |
867 | } else { |
868 | let (span, i) = scan_html_block_inner( |
869 | // Subtract 1 to include the < character |
870 | &bytes[(ix - 1)..], |
871 | Some(&|bytes| { |
872 | let mut line_start = LineStart::new(bytes); |
873 | let _ = scan_containers(&self.tree, &mut line_start); |
874 | line_start.bytes_scanned() |
875 | }), |
876 | )?; |
877 | Some((span, i + ix - 1)) |
878 | } |
879 | } |
880 | |
881 | /// Consumes the event iterator and produces an iterator that produces |
882 | /// `(Event, Range)` pairs, where the `Range` value maps to the corresponding |
883 | /// range in the markdown source. |
884 | pub fn into_offset_iter(self) -> OffsetIter<'input, 'callback> { |
885 | OffsetIter { inner: self } |
886 | } |
887 | } |
888 | |
889 | /// Returns number of containers scanned. |
890 | pub(crate) fn scan_containers(tree: &Tree<Item>, line_start: &mut LineStart) -> usize { |
891 | let mut i: usize = 0; |
892 | for &node_ix: TreeIndex in tree.walk_spine() { |
893 | match tree[node_ix].item.body { |
894 | ItemBody::BlockQuote => { |
895 | // `scan_blockquote_marker` saves & restores internally |
896 | if !line_start.scan_blockquote_marker() { |
897 | break; |
898 | } |
899 | } |
900 | ItemBody::ListItem(indent: usize) => { |
901 | let save: LineStart<'_> = line_start.clone(); |
902 | if !line_start.scan_space(n_space:indent) && !line_start.is_at_eol() { |
903 | *line_start = save; |
904 | break; |
905 | } |
906 | } |
907 | _ => (), |
908 | } |
909 | i += 1; |
910 | } |
911 | i |
912 | } |
913 | |
914 | impl<'a> Tree<Item> { |
915 | pub(crate) fn append_text(&mut self, start: usize, end: usize) { |
916 | if end > start { |
917 | if let Some(ix: TreeIndex) = self.cur() { |
918 | if ItemBody::Text == self[ix].item.body && self[ix].item.end == start { |
919 | self[ix].item.end = end; |
920 | return; |
921 | } |
922 | } |
923 | self.append(Item { |
924 | start, |
925 | end, |
926 | body: ItemBody::Text, |
927 | }); |
928 | } |
929 | } |
930 | } |
931 | |
932 | #[derive (Copy, Clone, Debug)] |
933 | struct InlineEl { |
934 | start: TreeIndex, // offset of tree node |
935 | count: usize, |
936 | c: u8, // b'*' or b'_' |
937 | both: bool, // can both open and close |
938 | } |
939 | |
940 | #[derive (Debug, Clone, Default)] |
941 | struct InlineStack { |
942 | stack: Vec<InlineEl>, |
943 | // Lower bounds for matching indices in the stack. For example |
944 | // a strikethrough delimiter will never match with any element |
945 | // in the stack with index smaller than |
946 | // `lower_bounds[InlineStack::TILDES]`. |
947 | lower_bounds: [usize; 7], |
948 | } |
949 | |
950 | impl InlineStack { |
951 | /// These are indices into the lower bounds array. |
952 | /// Not both refers to the property that the delimiter can not both |
953 | /// be opener as a closer. |
954 | const UNDERSCORE_NOT_BOTH: usize = 0; |
955 | const ASTERISK_NOT_BOTH: usize = 1; |
956 | const ASTERISK_BASE: usize = 2; |
957 | const TILDES: usize = 5; |
958 | const UNDERSCORE_BOTH: usize = 6; |
959 | |
960 | fn pop_all(&mut self, tree: &mut Tree<Item>) { |
961 | for el in self.stack.drain(..) { |
962 | for i in 0..el.count { |
963 | tree[el.start + i].item.body = ItemBody::Text; |
964 | } |
965 | } |
966 | self.lower_bounds = [0; 7]; |
967 | } |
968 | |
969 | fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize { |
970 | if c == b'_' { |
971 | if both { |
972 | self.lower_bounds[InlineStack::UNDERSCORE_BOTH] |
973 | } else { |
974 | self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] |
975 | } |
976 | } else if c == b'*' { |
977 | let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3]; |
978 | if both { |
979 | mod3_lower |
980 | } else { |
981 | min( |
982 | mod3_lower, |
983 | self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH], |
984 | ) |
985 | } |
986 | } else { |
987 | self.lower_bounds[InlineStack::TILDES] |
988 | } |
989 | } |
990 | |
991 | fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) { |
992 | if c == b'_' { |
993 | if both { |
994 | self.lower_bounds[InlineStack::UNDERSCORE_BOTH] = new_bound; |
995 | } else { |
996 | self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound; |
997 | } |
998 | } else if c == b'*' { |
999 | self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound; |
1000 | if !both { |
1001 | self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound; |
1002 | } |
1003 | } else { |
1004 | self.lower_bounds[InlineStack::TILDES] = new_bound; |
1005 | } |
1006 | } |
1007 | |
1008 | fn find_match( |
1009 | &mut self, |
1010 | tree: &mut Tree<Item>, |
1011 | c: u8, |
1012 | count: usize, |
1013 | both: bool, |
1014 | ) -> Option<InlineEl> { |
1015 | let lowerbound = min(self.stack.len(), self.get_lowerbound(c, count, both)); |
1016 | let res = self.stack[lowerbound..] |
1017 | .iter() |
1018 | .cloned() |
1019 | .enumerate() |
1020 | .rfind(|(_, el)| { |
1021 | el.c == c && (!both && !el.both || (count + el.count) % 3 != 0 || count % 3 == 0) |
1022 | }); |
1023 | |
1024 | if let Some((matching_ix, matching_el)) = res { |
1025 | let matching_ix = matching_ix + lowerbound; |
1026 | for el in &self.stack[(matching_ix + 1)..] { |
1027 | for i in 0..el.count { |
1028 | tree[el.start + i].item.body = ItemBody::Text; |
1029 | } |
1030 | } |
1031 | self.stack.truncate(matching_ix); |
1032 | Some(matching_el) |
1033 | } else { |
1034 | self.set_lowerbound(c, count, both, self.stack.len()); |
1035 | None |
1036 | } |
1037 | } |
1038 | |
1039 | fn push(&mut self, el: InlineEl) { |
1040 | self.stack.push(el) |
1041 | } |
1042 | } |
1043 | |
1044 | #[derive (Debug, Clone)] |
1045 | enum RefScan<'a> { |
1046 | // label, source ix of label end |
1047 | LinkLabel(CowStr<'a>, usize), |
1048 | // contains next node index |
1049 | Collapsed(Option<TreeIndex>), |
1050 | Failed, |
1051 | } |
1052 | |
1053 | /// Skips forward within a block to a node which spans (ends inclusive) the given |
1054 | /// index into the source. |
1055 | fn scan_nodes_to_ix( |
1056 | tree: &Tree<Item>, |
1057 | mut node: Option<TreeIndex>, |
1058 | ix: usize, |
1059 | ) -> Option<TreeIndex> { |
1060 | while let Some(node_ix: TreeIndex) = node { |
1061 | if tree[node_ix].item.end <= ix { |
1062 | node = tree[node_ix].next; |
1063 | } else { |
1064 | break; |
1065 | } |
1066 | } |
1067 | node |
1068 | } |
1069 | |
1070 | /// Scans an inline link label, which cannot be interrupted. |
1071 | /// Returns number of bytes (including brackets) and label on success. |
1072 | fn scan_link_label<'text, 'tree>( |
1073 | tree: &'tree Tree<Item>, |
1074 | text: &'text str, |
1075 | allow_footnote_refs: bool, |
1076 | ) -> Option<(usize, ReferenceLabel<'text>)> { |
1077 | let bytes: &&[u8] = &text.as_bytes(); |
1078 | if bytes.len() < 2 || bytes[0] != b'[' { |
1079 | return None; |
1080 | } |
1081 | let linebreak_handler: impl Fn(&[u8]) -> Option<…> = |bytes: &[u8]| { |
1082 | let mut line_start: LineStart<'_> = LineStart::new(bytes); |
1083 | let _ = scan_containers(tree, &mut line_start); |
1084 | Some(line_start.bytes_scanned()) |
1085 | }; |
1086 | let pair: (usize, ReferenceLabel<'_>) = if allow_footnote_refs && b'^' == bytes[1] { |
1087 | let (byte_index: usize, cow: CowStr<'_>) = scan_link_label_rest(&text[2..], &linebreak_handler)?; |
1088 | (byte_index + 2, ReferenceLabel::Footnote(cow)) |
1089 | } else { |
1090 | let (byte_index: usize, cow: CowStr<'_>) = scan_link_label_rest(&text[1..], &linebreak_handler)?; |
1091 | (byte_index + 1, ReferenceLabel::Link(cow)) |
1092 | }; |
1093 | Some(pair) |
1094 | } |
1095 | |
1096 | fn scan_reference<'a, 'b>( |
1097 | tree: &'a Tree<Item>, |
1098 | text: &'b str, |
1099 | cur: Option<TreeIndex>, |
1100 | allow_footnote_refs: bool, |
1101 | ) -> RefScan<'b> { |
1102 | let cur_ix: TreeIndex = match cur { |
1103 | None => return RefScan::Failed, |
1104 | Some(cur_ix: TreeIndex) => cur_ix, |
1105 | }; |
1106 | let start: usize = tree[cur_ix].item.start; |
1107 | let tail: &[u8] = &text.as_bytes()[start..]; |
1108 | |
1109 | if tail.starts_with(needle:b"[]" ) { |
1110 | // TODO: this unwrap is sus and should be looked at closer |
1111 | let closing_node: TreeIndex = tree[cur_ix].next.unwrap(); |
1112 | RefScan::Collapsed(tree[closing_node].next) |
1113 | } else if let Some((ix: usize, ReferenceLabel::Link(label: CowStr<'_>))) = |
1114 | scan_link_label(tree, &text[start..], allow_footnote_refs) |
1115 | { |
1116 | RefScan::LinkLabel(label, start + ix) |
1117 | } else { |
1118 | RefScan::Failed |
1119 | } |
1120 | } |
1121 | |
1122 | #[derive (Clone, Default)] |
1123 | struct LinkStack { |
1124 | inner: Vec<LinkStackEl>, |
1125 | disabled_ix: usize, |
1126 | } |
1127 | |
1128 | impl LinkStack { |
1129 | fn push(&mut self, el: LinkStackEl) { |
1130 | self.inner.push(el); |
1131 | } |
1132 | |
1133 | fn pop(&mut self) -> Option<LinkStackEl> { |
1134 | let el = self.inner.pop(); |
1135 | self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len()); |
1136 | el |
1137 | } |
1138 | |
1139 | fn clear(&mut self) { |
1140 | self.inner.clear(); |
1141 | self.disabled_ix = 0; |
1142 | } |
1143 | |
1144 | fn disable_all_links(&mut self) { |
1145 | for el in &mut self.inner[self.disabled_ix..] { |
1146 | if el.ty == LinkStackTy::Link { |
1147 | el.ty = LinkStackTy::Disabled; |
1148 | } |
1149 | } |
1150 | self.disabled_ix = self.inner.len(); |
1151 | } |
1152 | } |
1153 | |
1154 | #[derive (Clone, Debug)] |
1155 | struct LinkStackEl { |
1156 | node: TreeIndex, |
1157 | ty: LinkStackTy, |
1158 | } |
1159 | |
1160 | #[derive (PartialEq, Clone, Debug)] |
1161 | enum LinkStackTy { |
1162 | Link, |
1163 | Image, |
1164 | Disabled, |
1165 | } |
1166 | |
1167 | /// Contains the destination URL, title and source span of a reference definition. |
1168 | #[derive (Clone, Debug)] |
1169 | pub struct LinkDef<'a> { |
1170 | pub dest: CowStr<'a>, |
1171 | pub title: Option<CowStr<'a>>, |
1172 | pub span: Range<usize>, |
1173 | } |
1174 | |
1175 | /// Tracks tree indices of code span delimiters of each length. It should prevent |
1176 | /// quadratic scanning behaviours by providing (amortized) constant time lookups. |
1177 | struct CodeDelims { |
1178 | inner: HashMap<usize, VecDeque<TreeIndex>>, |
1179 | seen_first: bool, |
1180 | } |
1181 | |
1182 | impl CodeDelims { |
1183 | fn new() -> Self { |
1184 | Self { |
1185 | inner: Default::default(), |
1186 | seen_first: false, |
1187 | } |
1188 | } |
1189 | |
1190 | fn insert(&mut self, count: usize, ix: TreeIndex) { |
1191 | if self.seen_first { |
1192 | self.inner |
1193 | .entry(count) |
1194 | .or_insert_with(Default::default) |
1195 | .push_back(ix); |
1196 | } else { |
1197 | // Skip the first insert, since that delimiter will always |
1198 | // be an opener and not a closer. |
1199 | self.seen_first = true; |
1200 | } |
1201 | } |
1202 | |
1203 | fn is_populated(&self) -> bool { |
1204 | !self.inner.is_empty() |
1205 | } |
1206 | |
1207 | fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> { |
1208 | while let Some(ix) = self.inner.get_mut(&count)?.pop_front() { |
1209 | if ix > open_ix { |
1210 | return Some(ix); |
1211 | } |
1212 | } |
1213 | None |
1214 | } |
1215 | |
1216 | fn clear(&mut self) { |
1217 | self.inner.clear(); |
1218 | self.seen_first = false; |
1219 | } |
1220 | } |
1221 | |
1222 | #[derive (Copy, Clone, PartialEq, Eq, Debug)] |
1223 | pub(crate) struct LinkIndex(usize); |
1224 | |
1225 | #[derive (Copy, Clone, PartialEq, Eq, Debug)] |
1226 | pub(crate) struct CowIndex(usize); |
1227 | |
1228 | #[derive (Copy, Clone, PartialEq, Eq, Debug)] |
1229 | pub(crate) struct AlignmentIndex(usize); |
1230 | |
1231 | #[derive (Copy, Clone, PartialEq, Eq, Debug)] |
1232 | pub(crate) struct HeadingIndex(NonZeroUsize); |
1233 | |
1234 | #[derive (Clone)] |
1235 | pub(crate) struct Allocations<'a> { |
1236 | pub refdefs: RefDefs<'a>, |
1237 | links: Vec<(LinkType, CowStr<'a>, CowStr<'a>)>, |
1238 | cows: Vec<CowStr<'a>>, |
1239 | alignments: Vec<Vec<Alignment>>, |
1240 | headings: Vec<HeadingAttributes<'a>>, |
1241 | } |
1242 | |
1243 | /// Used by the heading attributes extension. |
1244 | #[derive (Clone)] |
1245 | pub(crate) struct HeadingAttributes<'a> { |
1246 | pub id: Option<&'a str>, |
1247 | pub classes: Vec<&'a str>, |
1248 | } |
1249 | |
1250 | /// Keeps track of the reference definitions defined in the document. |
1251 | #[derive (Clone, Default, Debug)] |
1252 | pub struct RefDefs<'input>(pub(crate) HashMap<LinkLabel<'input>, LinkDef<'input>>); |
1253 | |
1254 | impl<'input, 'b, 's> RefDefs<'input> |
1255 | where |
1256 | 's: 'b, |
1257 | { |
1258 | /// Performs a lookup on reference label using unicode case folding. |
1259 | pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> { |
1260 | self.0.get(&UniCase::new(key.into())) |
1261 | } |
1262 | |
1263 | /// Provides an iterator over all the document's reference definitions. |
1264 | pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> { |
1265 | self.0.iter().map(|(k: &UniCase>, v: &LinkDef<'_>)| (k.as_ref(), v)) |
1266 | } |
1267 | } |
1268 | |
1269 | impl<'a> Allocations<'a> { |
1270 | pub fn new() -> Self { |
1271 | Self { |
1272 | refdefs: RefDefs::default(), |
1273 | links: Vec::with_capacity(128), |
1274 | cows: Vec::new(), |
1275 | alignments: Vec::new(), |
1276 | headings: Vec::new(), |
1277 | } |
1278 | } |
1279 | |
1280 | pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex { |
1281 | let ix = self.cows.len(); |
1282 | self.cows.push(cow); |
1283 | CowIndex(ix) |
1284 | } |
1285 | |
1286 | pub fn allocate_link(&mut self, ty: LinkType, url: CowStr<'a>, title: CowStr<'a>) -> LinkIndex { |
1287 | let ix = self.links.len(); |
1288 | self.links.push((ty, url, title)); |
1289 | LinkIndex(ix) |
1290 | } |
1291 | |
1292 | pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex { |
1293 | let ix = self.alignments.len(); |
1294 | self.alignments.push(alignment); |
1295 | AlignmentIndex(ix) |
1296 | } |
1297 | |
1298 | pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex { |
1299 | let ix = self.headings.len(); |
1300 | self.headings.push(attrs); |
1301 | // This won't panic. `self.headings.len()` can't be `usize::MAX` since |
1302 | // such a long Vec cannot fit in memory. |
1303 | let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings" ); |
1304 | HeadingIndex(ix_nonzero) |
1305 | } |
1306 | } |
1307 | |
1308 | impl<'a> Index<CowIndex> for Allocations<'a> { |
1309 | type Output = CowStr<'a>; |
1310 | |
1311 | fn index(&self, ix: CowIndex) -> &Self::Output { |
1312 | self.cows.index(ix.0) |
1313 | } |
1314 | } |
1315 | |
1316 | impl<'a> Index<LinkIndex> for Allocations<'a> { |
1317 | type Output = (LinkType, CowStr<'a>, CowStr<'a>); |
1318 | |
1319 | fn index(&self, ix: LinkIndex) -> &Self::Output { |
1320 | self.links.index(ix.0) |
1321 | } |
1322 | } |
1323 | |
1324 | impl<'a> Index<AlignmentIndex> for Allocations<'a> { |
1325 | type Output = Vec<Alignment>; |
1326 | |
1327 | fn index(&self, ix: AlignmentIndex) -> &Self::Output { |
1328 | self.alignments.index(ix.0) |
1329 | } |
1330 | } |
1331 | |
1332 | impl<'a> Index<HeadingIndex> for Allocations<'a> { |
1333 | type Output = HeadingAttributes<'a>; |
1334 | |
1335 | fn index(&self, ix: HeadingIndex) -> &Self::Output { |
1336 | self.headings.index(ix.0.get() - 1) |
1337 | } |
1338 | } |
1339 | |
1340 | /// A struct containing information on the reachability of certain inline HTML |
1341 | /// elements. In particular, for cdata elements (`<![CDATA[`), processing |
1342 | /// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes |
1343 | /// represent the indices before which a scan will always fail and can hence |
1344 | /// be skipped. |
1345 | #[derive (Clone, Default)] |
1346 | pub(crate) struct HtmlScanGuard { |
1347 | pub cdata: usize, |
1348 | pub processing: usize, |
1349 | pub declaration: usize, |
1350 | } |
1351 | |
1352 | pub type BrokenLinkCallback<'input, 'borrow> = |
1353 | Option<&'borrow mut dyn FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>>; |
1354 | |
1355 | /// Markdown event and source range iterator. |
1356 | /// |
1357 | /// Generates tuples where the first element is the markdown event and the second |
1358 | /// is a the corresponding range in the source string. |
1359 | /// |
1360 | /// Constructed from a `Parser` using its |
1361 | /// [`into_offset_iter`](struct.Parser.html#method.into_offset_iter) method. |
1362 | #[derive (Debug)] |
1363 | pub struct OffsetIter<'a, 'b> { |
1364 | inner: Parser<'a, 'b>, |
1365 | } |
1366 | |
1367 | impl<'a, 'b> OffsetIter<'a, 'b> { |
1368 | /// Returns a reference to the internal reference definition tracker. |
1369 | pub fn reference_definitions(&self) -> &RefDefs { |
1370 | self.inner.reference_definitions() |
1371 | } |
1372 | } |
1373 | |
1374 | impl<'a, 'b> Iterator for OffsetIter<'a, 'b> { |
1375 | type Item = (Event<'a>, Range<usize>); |
1376 | |
1377 | fn next(&mut self) -> Option<Self::Item> { |
1378 | match self.inner.tree.cur() { |
1379 | None => { |
1380 | let ix = self.inner.tree.pop()?; |
1381 | let tag = item_to_tag(&self.inner.tree[ix].item, &self.inner.allocs); |
1382 | self.inner.tree.next_sibling(ix); |
1383 | let span = self.inner.tree[ix].item.start..self.inner.tree[ix].item.end; |
1384 | debug_assert!(span.start <= span.end); |
1385 | Some((Event::End(tag), span)) |
1386 | } |
1387 | Some(cur_ix) => { |
1388 | if self.inner.tree[cur_ix].item.body.is_inline() { |
1389 | self.inner.handle_inline(); |
1390 | } |
1391 | |
1392 | let node = self.inner.tree[cur_ix]; |
1393 | let item = node.item; |
1394 | let event = item_to_event(item, self.inner.text, &self.inner.allocs); |
1395 | if let Event::Start(..) = event { |
1396 | self.inner.tree.push(); |
1397 | } else { |
1398 | self.inner.tree.next_sibling(cur_ix); |
1399 | } |
1400 | debug_assert!(item.start <= item.end); |
1401 | Some((event, item.start..item.end)) |
1402 | } |
1403 | } |
1404 | } |
1405 | } |
1406 | |
1407 | fn item_to_tag<'a>(item: &Item, allocs: &Allocations<'a>) -> Tag<'a> { |
1408 | match item.body { |
1409 | ItemBody::Paragraph => Tag::Paragraph, |
1410 | ItemBody::Emphasis => Tag::Emphasis, |
1411 | ItemBody::Strong => Tag::Strong, |
1412 | ItemBody::Strikethrough => Tag::Strikethrough, |
1413 | ItemBody::Link(link_ix) => { |
1414 | let &(ref link_type, ref url, ref title) = allocs.index(link_ix); |
1415 | Tag::Link(*link_type, url.clone(), title.clone()) |
1416 | } |
1417 | ItemBody::Image(link_ix) => { |
1418 | let &(ref link_type, ref url, ref title) = allocs.index(link_ix); |
1419 | Tag::Image(*link_type, url.clone(), title.clone()) |
1420 | } |
1421 | ItemBody::Heading(level, Some(heading_ix)) => { |
1422 | let HeadingAttributes { id, classes } = allocs.index(heading_ix); |
1423 | Tag::Heading(level, *id, classes.clone()) |
1424 | } |
1425 | ItemBody::Heading(level, None) => Tag::Heading(level, None, Vec::new()), |
1426 | ItemBody::FencedCodeBlock(cow_ix) => { |
1427 | Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone())) |
1428 | } |
1429 | ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented), |
1430 | ItemBody::BlockQuote => Tag::BlockQuote, |
1431 | ItemBody::List(_, c, listitem_start) => { |
1432 | if c == b'.' || c == b')' { |
1433 | Tag::List(Some(listitem_start)) |
1434 | } else { |
1435 | Tag::List(None) |
1436 | } |
1437 | } |
1438 | ItemBody::ListItem(_) => Tag::Item, |
1439 | ItemBody::TableHead => Tag::TableHead, |
1440 | ItemBody::TableCell => Tag::TableCell, |
1441 | ItemBody::TableRow => Tag::TableRow, |
1442 | ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()), |
1443 | ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()), |
1444 | _ => panic!("unexpected item body {:?}" , item.body), |
1445 | } |
1446 | } |
1447 | |
1448 | fn item_to_event<'a>(item: Item, text: &'a str, allocs: &Allocations<'a>) -> Event<'a> { |
1449 | let tag = match item.body { |
1450 | ItemBody::Text => return Event::Text(text[item.start..item.end].into()), |
1451 | ItemBody::Code(cow_ix) => return Event::Code(allocs[cow_ix].clone()), |
1452 | ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs[cow_ix].clone()), |
1453 | ItemBody::SynthesizeChar(c) => return Event::Text(c.into()), |
1454 | ItemBody::Html => return Event::Html(text[item.start..item.end].into()), |
1455 | ItemBody::OwnedHtml(cow_ix) => return Event::Html(allocs[cow_ix].clone()), |
1456 | ItemBody::SoftBreak => return Event::SoftBreak, |
1457 | ItemBody::HardBreak => return Event::HardBreak, |
1458 | ItemBody::FootnoteReference(cow_ix) => { |
1459 | return Event::FootnoteReference(allocs[cow_ix].clone()) |
1460 | } |
1461 | ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked), |
1462 | ItemBody::Rule => return Event::Rule, |
1463 | |
1464 | ItemBody::Paragraph => Tag::Paragraph, |
1465 | ItemBody::Emphasis => Tag::Emphasis, |
1466 | ItemBody::Strong => Tag::Strong, |
1467 | ItemBody::Strikethrough => Tag::Strikethrough, |
1468 | ItemBody::Link(link_ix) => { |
1469 | let &(ref link_type, ref url, ref title) = allocs.index(link_ix); |
1470 | Tag::Link(*link_type, url.clone(), title.clone()) |
1471 | } |
1472 | ItemBody::Image(link_ix) => { |
1473 | let &(ref link_type, ref url, ref title) = allocs.index(link_ix); |
1474 | Tag::Image(*link_type, url.clone(), title.clone()) |
1475 | } |
1476 | ItemBody::Heading(level, Some(heading_ix)) => { |
1477 | let HeadingAttributes { id, classes } = allocs.index(heading_ix); |
1478 | Tag::Heading(level, *id, classes.clone()) |
1479 | } |
1480 | ItemBody::Heading(level, None) => Tag::Heading(level, None, Vec::new()), |
1481 | ItemBody::FencedCodeBlock(cow_ix) => { |
1482 | Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone())) |
1483 | } |
1484 | ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented), |
1485 | ItemBody::BlockQuote => Tag::BlockQuote, |
1486 | ItemBody::List(_, c, listitem_start) => { |
1487 | if c == b'.' || c == b')' { |
1488 | Tag::List(Some(listitem_start)) |
1489 | } else { |
1490 | Tag::List(None) |
1491 | } |
1492 | } |
1493 | ItemBody::ListItem(_) => Tag::Item, |
1494 | ItemBody::TableHead => Tag::TableHead, |
1495 | ItemBody::TableCell => Tag::TableCell, |
1496 | ItemBody::TableRow => Tag::TableRow, |
1497 | ItemBody::Table(alignment_ix) => Tag::Table(allocs[alignment_ix].clone()), |
1498 | ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs[cow_ix].clone()), |
1499 | _ => panic!("unexpected item body {:?}" , item.body), |
1500 | }; |
1501 | |
1502 | Event::Start(tag) |
1503 | } |
1504 | |
1505 | impl<'a, 'b> Iterator for Parser<'a, 'b> { |
1506 | type Item = Event<'a>; |
1507 | |
1508 | fn next(&mut self) -> Option<Event<'a>> { |
1509 | match self.tree.cur() { |
1510 | None => { |
1511 | let ix = self.tree.pop()?; |
1512 | let tag = item_to_tag(&self.tree[ix].item, &self.allocs); |
1513 | self.tree.next_sibling(ix); |
1514 | Some(Event::End(tag)) |
1515 | } |
1516 | Some(cur_ix) => { |
1517 | if self.tree[cur_ix].item.body.is_inline() { |
1518 | self.handle_inline(); |
1519 | } |
1520 | |
1521 | let node = self.tree[cur_ix]; |
1522 | let item = node.item; |
1523 | let event = item_to_event(item, self.text, &self.allocs); |
1524 | if let Event::Start(..) = event { |
1525 | self.tree.push(); |
1526 | } else { |
1527 | self.tree.next_sibling(cur_ix); |
1528 | } |
1529 | Some(event) |
1530 | } |
1531 | } |
1532 | } |
1533 | } |
1534 | |
1535 | impl FusedIterator for Parser<'_, '_> {} |
1536 | |
1537 | #[cfg (test)] |
1538 | mod test { |
1539 | use super::*; |
1540 | use crate::tree::Node; |
1541 | |
1542 | // TODO: move these tests to tests/html.rs? |
1543 | |
1544 | fn parser_with_extensions(text: &str) -> Parser<'_, 'static> { |
1545 | let mut opts = Options::empty(); |
1546 | opts.insert(Options::ENABLE_TABLES); |
1547 | opts.insert(Options::ENABLE_FOOTNOTES); |
1548 | opts.insert(Options::ENABLE_STRIKETHROUGH); |
1549 | opts.insert(Options::ENABLE_TASKLISTS); |
1550 | |
1551 | Parser::new_ext(text, opts) |
1552 | } |
1553 | |
1554 | #[test ] |
1555 | #[cfg (target_pointer_width = "64" )] |
1556 | fn node_size() { |
1557 | let node_size = std::mem::size_of::<Node<Item>>(); |
1558 | assert_eq!(48, node_size); |
1559 | } |
1560 | |
1561 | #[test ] |
1562 | #[cfg (target_pointer_width = "64" )] |
1563 | fn body_size() { |
1564 | let body_size = std::mem::size_of::<ItemBody>(); |
1565 | assert_eq!(16, body_size); |
1566 | } |
1567 | |
1568 | #[test ] |
1569 | fn single_open_fish_bracket() { |
1570 | // dont crash |
1571 | assert_eq!(3, Parser::new("<" ).count()); |
1572 | } |
1573 | |
1574 | #[test ] |
1575 | fn lone_hashtag() { |
1576 | // dont crash |
1577 | assert_eq!(2, Parser::new("#" ).count()); |
1578 | } |
1579 | |
1580 | #[test ] |
1581 | fn lots_of_backslashes() { |
1582 | // dont crash |
1583 | Parser::new(" \\\\\r\r" ).count(); |
1584 | Parser::new(" \\\r\r\\. \\\\\r\r\\. \\" ).count(); |
1585 | } |
1586 | |
1587 | #[test ] |
1588 | fn issue_320() { |
1589 | // dont crash |
1590 | parser_with_extensions(": \r\t> | \r: \r\t> | \r" ).count(); |
1591 | } |
1592 | |
1593 | #[test ] |
1594 | fn issue_319() { |
1595 | // dont crash |
1596 | parser_with_extensions("| \r-]([^| \r-]([^" ).count(); |
1597 | parser_with_extensions("| \r\r=][^| \r\r=][^car" ).count(); |
1598 | } |
1599 | |
1600 | #[test ] |
1601 | fn issue_303() { |
1602 | // dont crash |
1603 | parser_with_extensions("[^ \r\ra]" ).count(); |
1604 | parser_with_extensions(" \r\r]Z[^ \x00\r\r]Z[^ \x00" ).count(); |
1605 | } |
1606 | |
1607 | #[test ] |
1608 | fn issue_313() { |
1609 | // dont crash |
1610 | parser_with_extensions("*]0[^ \r\r*]0[^" ).count(); |
1611 | parser_with_extensions("[^ \r> `][^ \r> `][^ \r> `][" ).count(); |
1612 | } |
1613 | |
1614 | #[test ] |
1615 | fn issue_311() { |
1616 | // dont crash |
1617 | parser_with_extensions(" \\\u{0d}- \u{09}\\\u{0d}- \u{09}" ).count(); |
1618 | } |
1619 | |
1620 | #[test ] |
1621 | fn issue_283() { |
1622 | let input = std::str::from_utf8(b" \xf0\x9b\xb2\x9f<td:^ \xf0\x9b\xb2\x9f" ).unwrap(); |
1623 | // dont crash |
1624 | parser_with_extensions(input).count(); |
1625 | } |
1626 | |
1627 | #[test ] |
1628 | fn issue_289() { |
1629 | // dont crash |
1630 | parser_with_extensions("> - \\\n> - " ).count(); |
1631 | parser_with_extensions("- \n\n" ).count(); |
1632 | } |
1633 | |
1634 | #[test ] |
1635 | fn issue_306() { |
1636 | // dont crash |
1637 | parser_with_extensions("* \r_<__* \r_<__* \r_<__* \r_<__" ).count(); |
1638 | } |
1639 | |
1640 | #[test ] |
1641 | fn issue_305() { |
1642 | // dont crash |
1643 | parser_with_extensions("_6**6*_*" ).count(); |
1644 | } |
1645 | |
1646 | #[test ] |
1647 | fn another_emphasis_panic() { |
1648 | parser_with_extensions("*__#_#__*" ).count(); |
1649 | } |
1650 | |
1651 | #[test ] |
1652 | fn offset_iter() { |
1653 | let event_offsets: Vec<_> = Parser::new("*hello* world" ) |
1654 | .into_offset_iter() |
1655 | .map(|(_ev, range)| range) |
1656 | .collect(); |
1657 | let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)]; |
1658 | assert_eq!(expected_offsets, event_offsets); |
1659 | } |
1660 | |
1661 | #[test ] |
1662 | fn reference_link_offsets() { |
1663 | let range = |
1664 | Parser::new("# H1 \n[testing][Some reference] \n\n[Some reference]: https://github.com" ) |
1665 | .into_offset_iter() |
1666 | .filter_map(|(ev, range)| match ev { |
1667 | Event::Start(Tag::Link(LinkType::Reference, ..), ..) => Some(range), |
1668 | _ => None, |
1669 | }) |
1670 | .next() |
1671 | .unwrap(); |
1672 | assert_eq!(5..30, range); |
1673 | } |
1674 | |
1675 | #[test ] |
1676 | fn footnote_offsets() { |
1677 | let range = parser_with_extensions("Testing this[^1] out. \n\n[^1]: Footnote." ) |
1678 | .into_offset_iter() |
1679 | .filter_map(|(ev, range)| match ev { |
1680 | Event::FootnoteReference(..) => Some(range), |
1681 | _ => None, |
1682 | }) |
1683 | .next() |
1684 | .unwrap(); |
1685 | assert_eq!(12..16, range); |
1686 | } |
1687 | |
1688 | #[test ] |
1689 | fn table_offset() { |
1690 | let markdown = "a \n\nTesting|This|Outtt \n--|:--:|--: \nSome Data|Other data|asdf" ; |
1691 | let event_offset = parser_with_extensions(markdown) |
1692 | .into_offset_iter() |
1693 | .map(|(_ev, range)| range) |
1694 | .nth(3) |
1695 | .unwrap(); |
1696 | let expected_offset = 3..59; |
1697 | assert_eq!(expected_offset, event_offset); |
1698 | } |
1699 | |
1700 | #[test ] |
1701 | fn table_cell_span() { |
1702 | let markdown = "a|b|c \n--|--|-- \na| |c" ; |
1703 | let event_offset = parser_with_extensions(markdown) |
1704 | .into_offset_iter() |
1705 | .filter_map(|(ev, span)| match ev { |
1706 | Event::Start(Tag::TableCell) => Some(span), |
1707 | _ => None, |
1708 | }) |
1709 | .nth(4) |
1710 | .unwrap(); |
1711 | let expected_offset_start = "a|b|c \n--|--|-- \na|" .len(); |
1712 | assert_eq!( |
1713 | expected_offset_start..(expected_offset_start + 2), |
1714 | event_offset |
1715 | ); |
1716 | } |
1717 | |
1718 | #[test ] |
1719 | fn offset_iter_issue_378() { |
1720 | let event_offsets: Vec<_> = Parser::new("a [b](c) d" ) |
1721 | .into_offset_iter() |
1722 | .map(|(_ev, range)| range) |
1723 | .collect(); |
1724 | let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)]; |
1725 | assert_eq!(expected_offsets, event_offsets); |
1726 | } |
1727 | |
1728 | #[test ] |
1729 | fn offset_iter_issue_404() { |
1730 | let event_offsets: Vec<_> = Parser::new("### \n" ) |
1731 | .into_offset_iter() |
1732 | .map(|(_ev, range)| range) |
1733 | .collect(); |
1734 | let expected_offsets = vec![(0..4), (0..4)]; |
1735 | assert_eq!(expected_offsets, event_offsets); |
1736 | } |
1737 | |
1738 | // FIXME: add this one regression suite |
1739 | #[test ] |
1740 | fn link_def_at_eof() { |
1741 | let test_str = "[My site][world] \n\n[world]: https://vincentprouillet.com" ; |
1742 | let expected = "<p><a href= \"https://vincentprouillet.com \">My site</a></p> \n" ; |
1743 | |
1744 | let mut buf = String::new(); |
1745 | crate::html::push_html(&mut buf, Parser::new(test_str)); |
1746 | assert_eq!(expected, buf); |
1747 | } |
1748 | |
1749 | #[test ] |
1750 | fn no_footnote_refs_without_option() { |
1751 | let test_str = "a [^a] \n\n[^a]: yolo" ; |
1752 | let expected = "<p>a <a href= \"yolo \">^a</a></p> \n" ; |
1753 | |
1754 | let mut buf = String::new(); |
1755 | crate::html::push_html(&mut buf, Parser::new(test_str)); |
1756 | assert_eq!(expected, buf); |
1757 | } |
1758 | |
1759 | #[test ] |
1760 | fn ref_def_at_eof() { |
1761 | let test_str = "[test]: \\" ; |
1762 | let expected = "" ; |
1763 | |
1764 | let mut buf = String::new(); |
1765 | crate::html::push_html(&mut buf, Parser::new(test_str)); |
1766 | assert_eq!(expected, buf); |
1767 | } |
1768 | |
1769 | #[test ] |
1770 | fn ref_def_cr_lf() { |
1771 | let test_str = "[a]: /u \r\n\n[a]" ; |
1772 | let expected = "<p><a href= \"/u \">a</a></p> \n" ; |
1773 | |
1774 | let mut buf = String::new(); |
1775 | crate::html::push_html(&mut buf, Parser::new(test_str)); |
1776 | assert_eq!(expected, buf); |
1777 | } |
1778 | |
1779 | #[test ] |
1780 | fn no_dest_refdef() { |
1781 | let test_str = "[a]:" ; |
1782 | let expected = "<p>[a]:</p> \n" ; |
1783 | |
1784 | let mut buf = String::new(); |
1785 | crate::html::push_html(&mut buf, Parser::new(test_str)); |
1786 | assert_eq!(expected, buf); |
1787 | } |
1788 | |
1789 | #[test ] |
1790 | fn broken_links_called_only_once() { |
1791 | for &(markdown, expected) in &[ |
1792 | ("See also [`g()`][crate::g]." , 1), |
1793 | ("See also [`g()`][crate::g][]." , 1), |
1794 | ("[brokenlink1] some other node [brokenlink2]" , 2), |
1795 | ] { |
1796 | let mut times_called = 0; |
1797 | let callback = &mut |_broken_link: BrokenLink| { |
1798 | times_called += 1; |
1799 | None |
1800 | }; |
1801 | let parser = |
1802 | Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback)); |
1803 | for _ in parser {} |
1804 | assert_eq!(times_called, expected); |
1805 | } |
1806 | } |
1807 | |
1808 | #[test ] |
1809 | fn simple_broken_link_callback() { |
1810 | let test_str = "This is a link w/o def: [hello][world]" ; |
1811 | let mut callback = |broken_link: BrokenLink| { |
1812 | assert_eq!("world" , broken_link.reference.as_ref()); |
1813 | assert_eq!(&test_str[broken_link.span], "[hello][world]" ); |
1814 | let url = "YOLO" .into(); |
1815 | let title = "SWAG" .to_owned().into(); |
1816 | Some((url, title)) |
1817 | }; |
1818 | let parser = |
1819 | Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback)); |
1820 | let mut link_tag_count = 0; |
1821 | for (typ, url, title) in parser.filter_map(|event| match event { |
1822 | Event::Start(tag) | Event::End(tag) => match tag { |
1823 | Tag::Link(typ, url, title) => Some((typ, url, title)), |
1824 | _ => None, |
1825 | }, |
1826 | _ => None, |
1827 | }) { |
1828 | link_tag_count += 1; |
1829 | assert_eq!(typ, LinkType::ReferenceUnknown); |
1830 | assert_eq!(url.as_ref(), "YOLO" ); |
1831 | assert_eq!(title.as_ref(), "SWAG" ); |
1832 | } |
1833 | assert!(link_tag_count > 0); |
1834 | } |
1835 | |
1836 | #[test ] |
1837 | fn code_block_kind_check_fenced() { |
1838 | let parser = Parser::new("hello \n```test \ntadam \n```" ); |
1839 | let mut found = 0; |
1840 | for (ev, _range) in parser.into_offset_iter() { |
1841 | match ev { |
1842 | Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) => { |
1843 | assert_eq!(syntax.as_ref(), "test" ); |
1844 | found += 1; |
1845 | } |
1846 | _ => {} |
1847 | } |
1848 | } |
1849 | assert_eq!(found, 1); |
1850 | } |
1851 | |
1852 | #[test ] |
1853 | fn code_block_kind_check_indented() { |
1854 | let parser = Parser::new("hello \n\n ```test \n tadam \nhello" ); |
1855 | let mut found = 0; |
1856 | for (ev, _range) in parser.into_offset_iter() { |
1857 | match ev { |
1858 | Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => { |
1859 | found += 1; |
1860 | } |
1861 | _ => {} |
1862 | } |
1863 | } |
1864 | assert_eq!(found, 1); |
1865 | } |
1866 | |
1867 | #[test ] |
1868 | fn ref_defs() { |
1869 | let input = r###"[a B c]: http://example.com |
1870 | [another]: https://google.com |
1871 | |
1872 | text |
1873 | |
1874 | [final ONE]: http://wikipedia.org |
1875 | "### ; |
1876 | let mut parser = Parser::new(input); |
1877 | |
1878 | assert!(parser.reference_definitions().get("a b c" ).is_some()); |
1879 | assert!(parser.reference_definitions().get("nope" ).is_none()); |
1880 | |
1881 | if let Some(_event) = parser.next() { |
1882 | // testing keys with shorter lifetimes than parser and its input |
1883 | let s = "final one" .to_owned(); |
1884 | let link_def = parser.reference_definitions().get(&s).unwrap(); |
1885 | let span = &input[link_def.span.clone()]; |
1886 | assert_eq!(span, "[final ONE]: http://wikipedia.org" ); |
1887 | } |
1888 | } |
1889 | |
1890 | #[test ] |
1891 | fn common_lifetime_patterns_allowed<'b>() { |
1892 | let temporary_str = String::from("xyz" ); |
1893 | |
1894 | // NOTE: this is a limitation of Rust, it doesn't allow putting lifetime parameters on the closure itself. |
1895 | // Hack it by attaching the lifetime to the test function instead. |
1896 | // TODO: why is the `'b` lifetime required at all? Changing it to `'_` breaks things :( |
1897 | let mut closure = |link: BrokenLink<'b>| Some(("#" .into(), link.reference.into())); |
1898 | |
1899 | fn function<'a>(link: BrokenLink<'a>) -> Option<(CowStr<'a>, CowStr<'a>)> { |
1900 | Some(("#" .into(), link.reference)) |
1901 | } |
1902 | |
1903 | for _ in Parser::new_with_broken_link_callback( |
1904 | "static lifetime" , |
1905 | Options::empty(), |
1906 | Some(&mut closure), |
1907 | ) {} |
1908 | /* This fails to compile. Because the closure can't say `for <'a> fn(BrokenLink<'a>) -> |
1909 | * CowStr<'a>` and has to use the enclosing `'b` lifetime parameter, `temporary_str` lives |
1910 | * shorter than `'b`. I think this is unlikely to occur in real life, and if it does, the |
1911 | * fix is simple: move it out to a function that allows annotating the lifetimes. |
1912 | */ |
1913 | //for _ in Parser::new_with_broken_link_callback(&temporary_str, Options::empty(), Some(&mut callback)) { |
1914 | //} |
1915 | |
1916 | for _ in Parser::new_with_broken_link_callback( |
1917 | "static lifetime" , |
1918 | Options::empty(), |
1919 | Some(&mut function), |
1920 | ) {} |
1921 | for _ in Parser::new_with_broken_link_callback( |
1922 | &temporary_str, |
1923 | Options::empty(), |
1924 | Some(&mut function), |
1925 | ) {} |
1926 | } |
1927 | } |
1928 | |