1//! The first pass resolves all block structure, generating an AST. Within a block, items
2//! are in a linear chain with potential inline markup identified.
3
4use std::cmp::max;
5use std::ops::Range;
6
7use crate::parse::{
8 scan_containers, Allocations, FootnoteDef, HeadingAttributes, Item, ItemBody, LinkDef,
9 LINK_MAX_NESTED_PARENS,
10};
11use crate::strings::CowStr;
12use crate::tree::{Tree, TreeIndex};
13use crate::Options;
14use crate::{
15 linklabel::{scan_link_label_rest, LinkLabel},
16 HeadingLevel,
17};
18use crate::{scanners::*, MetadataBlockKind};
19
20use unicase::UniCase;
21
22/// Runs the first pass, which resolves the block structure of the document,
23/// and returns the resulting tree.
24pub(crate) fn run_first_pass(text: &str, options: Options) -> (Tree<Item>, Allocations<'_>) {
25 // This is a very naive heuristic for the number of nodes
26 // we'll need.
27 let start_capacity: usize = max(v1:128, v2:text.len() / 32);
28 let lookup_table: &[bool; 256] = &create_lut(&options);
29 let first_pass: FirstPass<'_, '_> = FirstPass {
30 text,
31 tree: Tree::with_capacity(cap:start_capacity),
32 begin_list_item: None,
33 last_line_blank: false,
34 allocs: Allocations::new(),
35 options,
36 lookup_table,
37 next_paragraph_task: None,
38 brace_context_next: 0,
39 brace_context_stack: Vec::new(),
40 };
41 first_pass.run()
42}
43
44// Each level of brace nesting adds another entry to a hash table.
45// To limit the amount of memory consumed, do not create a new brace
46// context beyond some amount deep.
47//
48// There are actually two limits at play here: this one,
49// and the one where the maximum amount of distinct contexts passes
50// the 255 item limit imposed by using `u8`. When over 255 distinct
51// contexts are created, it wraps around, while this one instead makes it
52// saturate, which is a better behavior.
53const MATH_BRACE_CONTEXT_MAX_NESTING: usize = 25;
54
55/// State for the first parsing pass.
56struct FirstPass<'a, 'b> {
57 text: &'a str,
58 tree: Tree<Item>,
59 begin_list_item: Option<usize>,
60 last_line_blank: bool,
61 allocs: Allocations<'a>,
62 options: Options,
63 lookup_table: &'b LookupTable,
64 /// This is `Some(item)` when the next paragraph
65 /// starts with a task list marker.
66 next_paragraph_task: Option<Item>,
67 /// Math environment brace nesting.
68 brace_context_stack: Vec<u8>,
69 brace_context_next: usize,
70}
71
72impl<'a, 'b> FirstPass<'a, 'b> {
73 fn run(mut self) -> (Tree<Item>, Allocations<'a>) {
74 let mut ix = 0;
75 while ix < self.text.len() {
76 ix = self.parse_block(ix);
77 }
78 for _ in 0..self.tree.spine_len() {
79 self.pop(ix);
80 }
81 (self.tree, self.allocs)
82 }
83
84 /// Returns offset after block.
85 fn parse_block(&mut self, mut start_ix: usize) -> usize {
86 let bytes = self.text.as_bytes();
87 let mut line_start = LineStart::new(&bytes[start_ix..]);
88
89 // math spans and their braces are tracked only within a single block
90 self.brace_context_stack.clear();
91 self.brace_context_next = 0;
92
93 let i = scan_containers(
94 &self.tree,
95 &mut line_start,
96 self.options.has_gfm_footnotes(),
97 );
98 for _ in i..self.tree.spine_len() {
99 self.pop(start_ix);
100 }
101
102 if self.options.contains(Options::ENABLE_OLD_FOOTNOTES) {
103 // finish footnote if it's still open and was preceded by blank line
104 if let Some(node_ix) = self.tree.peek_up() {
105 if let ItemBody::FootnoteDefinition(..) = self.tree[node_ix].item.body {
106 if self.last_line_blank {
107 self.pop(start_ix);
108 }
109 }
110 }
111
112 // Footnote definitions of the form
113 // [^bar]:
114 // * anything really
115 let container_start = start_ix + line_start.bytes_scanned();
116 if let Some(bytecount) = self.parse_footnote(container_start) {
117 start_ix = container_start + bytecount;
118 start_ix += scan_blank_line(&bytes[start_ix..]).unwrap_or(0);
119 line_start = LineStart::new(&bytes[start_ix..]);
120 }
121 }
122
123 // Process new containers
124 loop {
125 if self.options.has_gfm_footnotes()
126 || self.options.contains(Options::ENABLE_OLD_FOOTNOTES)
127 {
128 // Footnote definitions of the form
129 // [^bar]:
130 // * anything really
131 let save = line_start.clone();
132 let indent = line_start.scan_space_upto(4);
133 if indent < 4 {
134 let container_start = start_ix + line_start.bytes_scanned();
135 if let Some(bytecount) = self.parse_footnote(container_start) {
136 start_ix = container_start + bytecount;
137 line_start = LineStart::new(&bytes[start_ix..]);
138 continue;
139 } else {
140 line_start = save;
141 }
142 } else {
143 line_start = save;
144 }
145 }
146 let container_start = start_ix + line_start.bytes_scanned();
147 if let Some((ch, index, indent)) = line_start.scan_list_marker() {
148 let after_marker_index = start_ix + line_start.bytes_scanned();
149 self.continue_list(container_start, ch, index);
150 self.tree.append(Item {
151 start: container_start,
152 end: after_marker_index, // will get updated later if item not empty
153 body: ItemBody::ListItem(indent),
154 });
155 self.tree.push();
156 if let Some(n) = scan_blank_line(&bytes[after_marker_index..]) {
157 self.begin_list_item = Some(after_marker_index + n);
158 return after_marker_index + n;
159 }
160 if self.options.contains(Options::ENABLE_TASKLISTS) {
161 let task_list_marker =
162 line_start.scan_task_list_marker().map(|is_checked| Item {
163 start: after_marker_index,
164 end: start_ix + line_start.bytes_scanned(),
165 body: ItemBody::TaskListMarker(is_checked),
166 });
167 if let Some(task_list_marker) = task_list_marker {
168 if let Some(n) = scan_blank_line(&bytes[task_list_marker.end..]) {
169 self.tree.append(task_list_marker);
170 self.begin_list_item = Some(task_list_marker.end + n);
171 return task_list_marker.end + n;
172 } else {
173 self.next_paragraph_task = Some(task_list_marker);
174 }
175 }
176 }
177 } else if line_start.scan_blockquote_marker() {
178 let kind = if self.options.contains(Options::ENABLE_GFM) {
179 line_start.scan_blockquote_tag()
180 } else {
181 None
182 };
183 self.finish_list(start_ix);
184 self.tree.append(Item {
185 start: container_start,
186 end: 0, // will get set later
187 body: ItemBody::BlockQuote(kind),
188 });
189 self.tree.push();
190 if kind.is_some() {
191 // blockquote tag leaves us at the end of the line
192 // we need to scan through all the container syntax for the next line
193 // and break out if we can't re-scan all of them
194 let ix = start_ix + line_start.bytes_scanned();
195 let mut lazy_line_start = LineStart::new(&bytes[ix..]);
196 let current_container = scan_containers(
197 &self.tree,
198 &mut lazy_line_start,
199 self.options.has_gfm_footnotes(),
200 ) == self.tree.spine_len();
201 if !lazy_line_start.scan_space(4)
202 && self.scan_paragraph_interrupt(
203 &bytes[ix + lazy_line_start.bytes_scanned()..],
204 current_container,
205 )
206 {
207 return ix;
208 } else {
209 // blockquote tags act as if they were nested in a paragraph
210 // so you can lazily continue the imaginary paragraph off of them
211 line_start = lazy_line_start;
212 line_start.scan_all_space();
213 start_ix = ix;
214 break;
215 }
216 }
217 } else {
218 break;
219 }
220 }
221
222 let ix = start_ix + line_start.bytes_scanned();
223
224 if let Some(n) = scan_blank_line(&bytes[ix..]) {
225 if let Some(node_ix) = self.tree.peek_up() {
226 match &mut self.tree[node_ix].item.body {
227 ItemBody::BlockQuote(..) => (),
228 ItemBody::ListItem(indent) if self.begin_list_item.is_some() => {
229 self.last_line_blank = true;
230 // This is a blank list item.
231 // While the list itself can be continued no matter how many blank lines
232 // there are between this one and the next one, it cannot nest anything
233 // else, so its indentation should not be subtracted from the line start.
234 *indent = 0;
235 }
236 _ => {
237 self.last_line_blank = true;
238 }
239 }
240 }
241 return ix + n;
242 }
243
244 self.finish_list(start_ix);
245
246 // Save `remaining_space` here to avoid needing to backtrack `line_start` for HTML blocks
247 let remaining_space = line_start.remaining_space();
248
249 let indent = line_start.scan_space_upto(4);
250 if indent == 4 {
251 let ix = start_ix + line_start.bytes_scanned();
252 let remaining_space = line_start.remaining_space();
253 return self.parse_indented_code_block(ix, remaining_space);
254 }
255
256 let ix = start_ix + line_start.bytes_scanned();
257
258 // metadata blocks cannot be indented
259 if indent == 0 {
260 if let Some((_n, metadata_block_ch)) = scan_metadata_block(
261 &bytes[ix..],
262 self.options
263 .contains(Options::ENABLE_YAML_STYLE_METADATA_BLOCKS),
264 self.options
265 .contains(Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS),
266 ) {
267 return self.parse_metadata_block(ix, metadata_block_ch);
268 }
269 }
270
271 // HTML Blocks
272 if bytes[ix] == b'<' {
273 // Types 1-5 are all detected by one function and all end with the same
274 // pattern
275 if let Some(html_end_tag) = get_html_end_tag(&bytes[(ix + 1)..]) {
276 return self.parse_html_block_type_1_to_5(
277 ix,
278 html_end_tag,
279 remaining_space,
280 indent,
281 );
282 }
283
284 // Detect type 6
285 if starts_html_block_type_6(&bytes[(ix + 1)..]) {
286 return self.parse_html_block_type_6_or_7(ix, remaining_space, indent);
287 }
288
289 // Detect type 7
290 if let Some(_html_bytes) = scan_html_type_7(&bytes[ix..]) {
291 return self.parse_html_block_type_6_or_7(ix, remaining_space, indent);
292 }
293 }
294
295 if let Ok(n) = scan_hrule(&bytes[ix..]) {
296 return self.parse_hrule(n, ix);
297 }
298
299 if let Some(atx_size) = scan_atx_heading(&bytes[ix..]) {
300 return self.parse_atx_heading(ix, atx_size);
301 }
302
303 if let Some((n, fence_ch)) = scan_code_fence(&bytes[ix..]) {
304 return self.parse_fenced_code_block(ix, indent, fence_ch, n);
305 }
306
307 // parse refdef
308 while let Some((bytecount, label, link_def)) =
309 self.parse_refdef_total(start_ix + line_start.bytes_scanned())
310 {
311 self.allocs.refdefs.0.entry(label).or_insert(link_def);
312 let container_start = start_ix + line_start.bytes_scanned();
313 let mut ix = container_start + bytecount;
314 // Refdefs act as if they were contained within a paragraph, for purposes of lazy
315 // continuations. For example:
316 //
317 // ```
318 // > [foo]: http://example.com
319 // bar
320 // ```
321 //
322 // is equivalent to
323 //
324 // ```
325 // > bar
326 //
327 // [foo]: http://example.com
328 // ```
329 if let Some(nl) = scan_blank_line(&bytes[ix..]) {
330 ix += nl;
331 let mut lazy_line_start = LineStart::new(&bytes[ix..]);
332 let current_container = scan_containers(
333 &self.tree,
334 &mut lazy_line_start,
335 self.options.has_gfm_footnotes(),
336 ) == self.tree.spine_len();
337 if !lazy_line_start.scan_space(4)
338 && self.scan_paragraph_interrupt(
339 &bytes[ix + lazy_line_start.bytes_scanned()..],
340 current_container,
341 )
342 {
343 return ix;
344 } else {
345 line_start = lazy_line_start;
346 line_start.scan_all_space();
347 start_ix = ix;
348 }
349 } else {
350 return ix;
351 }
352 }
353
354 let ix = start_ix + line_start.bytes_scanned();
355
356 self.parse_paragraph(ix)
357 }
358
359 /// Returns the offset of the first line after the table.
360 /// Assumptions: current focus is a table element and the table header
361 /// matches the separator line (same number of columns).
362 fn parse_table(
363 &mut self,
364 table_cols: usize,
365 head_start: usize,
366 body_start: usize,
367 ) -> Option<usize> {
368 // filled empty cells are limited to protect against quadratic growth
369 // https://github.com/raphlinus/pulldown-cmark/issues/832
370 let mut missing_empty_cells = 0;
371 // parse header. this shouldn't fail because we made sure the table header is ok
372 let (_sep_start, thead_ix) =
373 self.parse_table_row_inner(head_start, table_cols, &mut missing_empty_cells)?;
374 self.tree[thead_ix].item.body = ItemBody::TableHead;
375
376 // parse body
377 let mut ix = body_start;
378 while let Some((next_ix, _row_ix)) =
379 self.parse_table_row(ix, table_cols, &mut missing_empty_cells)
380 {
381 ix = next_ix;
382 }
383
384 self.pop(ix);
385 Some(ix)
386 }
387
388 /// Call this when containers are taken care of.
389 /// Returns bytes scanned, row_ix
390 fn parse_table_row_inner(
391 &mut self,
392 mut ix: usize,
393 row_cells: usize,
394 missing_empty_cells: &mut usize,
395 ) -> Option<(usize, TreeIndex)> {
396 // Limit to prevent a malicious input from causing a denial of service.
397 const MAX_AUTOCOMPLETED_CELLS: usize = 1 << 18; // = 0x40000
398
399 let bytes = self.text.as_bytes();
400 let mut cells = 0;
401 let mut final_cell_ix = None;
402
403 let old_cur = self.tree.cur();
404 let row_ix = self.tree.append(Item {
405 start: ix,
406 end: 0, // set at end of this function
407 body: ItemBody::TableRow,
408 });
409 self.tree.push();
410
411 loop {
412 ix += scan_ch(&bytes[ix..], b'|');
413 let start_ix = ix;
414 ix += scan_whitespace_no_nl(&bytes[ix..]);
415
416 if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
417 ix += eol_bytes;
418 break;
419 }
420
421 let cell_ix = self.tree.append(Item {
422 start: start_ix,
423 end: ix,
424 body: ItemBody::TableCell,
425 });
426 self.tree.push();
427 let (next_ix, _brk) = self.parse_line(ix, None, TableParseMode::Active);
428
429 self.tree[cell_ix].item.end = next_ix;
430 self.tree.pop();
431
432 ix = next_ix;
433 cells += 1;
434
435 if cells == row_cells {
436 final_cell_ix = Some(cell_ix);
437 }
438 }
439
440 if let (Some(cur), 0) = (old_cur, cells) {
441 self.pop(ix);
442 self.tree[cur].next = None;
443 return None;
444 }
445
446 // fill empty cells if needed
447 // note: this is where GFM and commonmark-extra diverge. we follow
448 // GFM here
449 for _ in cells..row_cells {
450 if *missing_empty_cells >= MAX_AUTOCOMPLETED_CELLS {
451 return None;
452 }
453 *missing_empty_cells += 1;
454 self.tree.append(Item {
455 start: ix,
456 end: ix,
457 body: ItemBody::TableCell,
458 });
459 }
460
461 // drop excess cells
462 if let Some(cell_ix) = final_cell_ix {
463 self.tree[cell_ix].next = None;
464 }
465
466 self.pop(ix);
467
468 Some((ix, row_ix))
469 }
470
471 /// Returns first offset after the row and the tree index of the row.
472 fn parse_table_row(
473 &mut self,
474 mut ix: usize,
475 row_cells: usize,
476 missing_empty_cells: &mut usize,
477 ) -> Option<(usize, TreeIndex)> {
478 let bytes = self.text.as_bytes();
479 let mut line_start = LineStart::new(&bytes[ix..]);
480 let current_container = scan_containers(
481 &self.tree,
482 &mut line_start,
483 self.options.has_gfm_footnotes(),
484 ) == self.tree.spine_len();
485 if !current_container {
486 return None;
487 }
488 line_start.scan_all_space();
489 ix += line_start.bytes_scanned();
490 if scan_paragraph_interrupt_no_table(
491 &bytes[ix..],
492 current_container,
493 self.options.contains(Options::ENABLE_FOOTNOTES),
494 &self.tree,
495 ) {
496 return None;
497 }
498
499 let (ix, row_ix) = self.parse_table_row_inner(ix, row_cells, missing_empty_cells)?;
500 Some((ix, row_ix))
501 }
502
503 /// Returns offset of line start after paragraph.
504 fn parse_paragraph(&mut self, start_ix: usize) -> usize {
505 let node_ix = self.tree.append(Item {
506 start: start_ix,
507 end: 0, // will get set later
508 body: ItemBody::Paragraph,
509 });
510 self.tree.push();
511
512 if let Some(item) = self.next_paragraph_task {
513 self.tree.append(item);
514 self.next_paragraph_task = None;
515 }
516
517 let bytes = self.text.as_bytes();
518 let mut ix = start_ix;
519 loop {
520 let scan_mode = if self.options.contains(Options::ENABLE_TABLES) && ix == start_ix {
521 TableParseMode::Scan
522 } else {
523 TableParseMode::Disabled
524 };
525 let (next_ix, brk) = self.parse_line(ix, None, scan_mode);
526
527 // break out when we find a table
528 if let Some(Item {
529 body: ItemBody::Table(alignment_ix),
530 ..
531 }) = brk
532 {
533 let table_cols = self.allocs[alignment_ix].len();
534 self.tree[node_ix].item.body = ItemBody::Table(alignment_ix);
535 // this clears out any stuff we may have appended - but there may
536 // be a cleaner way
537 self.tree[node_ix].child = None;
538 self.tree.pop();
539 self.tree.push();
540 if let Some(ix) = self.parse_table(table_cols, ix, next_ix) {
541 return ix;
542 }
543 }
544
545 ix = next_ix;
546 let mut line_start = LineStart::new(&bytes[ix..]);
547 let current_container = scan_containers(
548 &self.tree,
549 &mut line_start,
550 self.options.has_gfm_footnotes(),
551 ) == self.tree.spine_len();
552 let trailing_backslash_pos = match brk {
553 Some(Item {
554 start,
555 body: ItemBody::HardBreak(true),
556 ..
557 }) if bytes[start] == b'\\' => Some(start),
558 _ => None,
559 };
560 if !line_start.scan_space(4) {
561 let ix_new = ix + line_start.bytes_scanned();
562 if current_container {
563 if let Some(ix_setext) =
564 self.parse_setext_heading(ix_new, node_ix, trailing_backslash_pos.is_some())
565 {
566 if let Some(pos) = trailing_backslash_pos {
567 self.tree.append_text(pos, pos + 1, false);
568 }
569 ix = ix_setext;
570 break;
571 }
572 }
573 // first check for non-empty lists, then for other interrupts
574 let suffix = &bytes[ix_new..];
575 if self.scan_paragraph_interrupt(suffix, current_container) {
576 if let Some(pos) = trailing_backslash_pos {
577 self.tree.append_text(pos, pos + 1, false);
578 }
579 break;
580 }
581 }
582 line_start.scan_all_space();
583 if line_start.is_at_eol() {
584 if let Some(pos) = trailing_backslash_pos {
585 self.tree.append_text(pos, pos + 1, false);
586 }
587 break;
588 }
589 ix = next_ix + line_start.bytes_scanned();
590 if let Some(item) = brk {
591 self.tree.append(item);
592 }
593 }
594
595 self.pop(ix);
596 ix
597 }
598
599 /// Returns end ix of setext_heading on success.
600 fn parse_setext_heading(
601 &mut self,
602 ix: usize,
603 node_ix: TreeIndex,
604 has_trailing_content: bool,
605 ) -> Option<usize> {
606 let bytes = self.text.as_bytes();
607 let (n, level) = scan_setext_heading(&bytes[ix..])?;
608 let mut attrs = None;
609
610 if let Some(cur_ix) = self.tree.cur() {
611 let parent_ix = self.tree.peek_up().unwrap();
612 let header_start = self.tree[parent_ix].item.start;
613 // Note that `self.tree[parent_ix].item.end` might be zero at this point.
614 // Use the end position of the current node (i.e. the last known child
615 // of the parent) instead.
616 let header_end = self.tree[cur_ix].item.end;
617
618 // extract the trailing attribute block
619 let (content_end, attrs_) =
620 self.extract_and_parse_heading_attribute_block(header_start, header_end);
621 attrs = attrs_;
622
623 // strip trailing whitespace
624 let new_end = if has_trailing_content {
625 content_end
626 } else {
627 let trailing_ws =
628 scan_rev_while(&bytes[header_start..content_end], is_ascii_whitespace_no_nl);
629 content_end - trailing_ws
630 };
631
632 if attrs.is_some() {
633 // remove trailing block attributes
634 self.tree.truncate_siblings(new_end);
635 }
636
637 if let Some(cur_ix) = self.tree.cur() {
638 self.tree[cur_ix].item.end = new_end;
639 }
640 }
641
642 self.tree[node_ix].item.body = ItemBody::Heading(
643 level,
644 attrs.map(|attrs| self.allocs.allocate_heading(attrs)),
645 );
646
647 Some(ix + n)
648 }
649
650 /// Parse a line of input, appending text and items to tree.
651 ///
652 /// Returns: index after line and an item representing the break.
653 fn parse_line(
654 &mut self,
655 start: usize,
656 end: Option<usize>,
657 mode: TableParseMode,
658 ) -> (usize, Option<Item>) {
659 let bytes = self.text.as_bytes();
660 let bytes = match end {
661 Some(end) => &bytes[..end],
662 None => bytes,
663 };
664 let bytes_len = bytes.len();
665 let mut pipes = 0;
666 let mut last_pipe_ix = start;
667 let mut begin_text = start;
668 let mut backslash_escaped = false;
669
670 let (final_ix, brk) = iterate_special_bytes(self.lookup_table, bytes, start, |ix, byte| {
671 match byte {
672 b'\n' | b'\r' => {
673 if let TableParseMode::Active = mode {
674 return LoopInstruction::BreakAtWith(ix, None);
675 }
676
677 let mut i = ix;
678 let eol_bytes = scan_eol(&bytes[ix..]).unwrap();
679
680 let end_ix = ix + eol_bytes;
681 let trailing_backslashes = scan_rev_while(&bytes[..ix], |b| b == b'\\');
682 if trailing_backslashes % 2 == 1 && end_ix < bytes_len {
683 i -= 1;
684 self.tree.append_text(begin_text, i, backslash_escaped);
685 backslash_escaped = false;
686 return LoopInstruction::BreakAtWith(
687 end_ix,
688 Some(Item {
689 start: i,
690 end: end_ix,
691 body: ItemBody::HardBreak(true),
692 }),
693 );
694 }
695
696 if mode == TableParseMode::Scan && pipes > 0 {
697 // check if we may be parsing a table
698 let next_line_ix = ix + eol_bytes;
699 let mut line_start = LineStart::new(&bytes[next_line_ix..]);
700 if scan_containers(
701 &self.tree,
702 &mut line_start,
703 self.options.has_gfm_footnotes(),
704 ) == self.tree.spine_len()
705 {
706 let table_head_ix = next_line_ix + line_start.bytes_scanned();
707 let (table_head_bytes, alignment) =
708 scan_table_head(&bytes[table_head_ix..]);
709
710 if table_head_bytes > 0 {
711 // computing header count from number of pipes
712 let header_count =
713 count_header_cols(bytes, pipes, start, last_pipe_ix);
714
715 // make sure they match the number of columns we find in separator line
716 if alignment.len() == header_count {
717 let alignment_ix = self.allocs.allocate_alignment(alignment);
718 let end_ix = table_head_ix + table_head_bytes;
719 return LoopInstruction::BreakAtWith(
720 end_ix,
721 Some(Item {
722 start: i,
723 end: end_ix, // must update later
724 body: ItemBody::Table(alignment_ix),
725 }),
726 );
727 }
728 }
729 }
730 }
731
732 let trailing_whitespace =
733 scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl);
734 if trailing_whitespace >= 2 {
735 i -= trailing_whitespace;
736 self.tree.append_text(begin_text, i, backslash_escaped);
737 backslash_escaped = false;
738 return LoopInstruction::BreakAtWith(
739 end_ix,
740 Some(Item {
741 start: i,
742 end: end_ix,
743 body: ItemBody::HardBreak(false),
744 }),
745 );
746 }
747
748 self.tree
749 .append_text(begin_text, ix - trailing_whitespace, backslash_escaped);
750 backslash_escaped = false;
751
752 LoopInstruction::BreakAtWith(
753 end_ix,
754 Some(Item {
755 start: i,
756 end: end_ix,
757 body: ItemBody::SoftBreak,
758 }),
759 )
760 }
761 b'\\' => {
762 if ix + 1 < bytes_len && is_ascii_punctuation(bytes[ix + 1]) {
763 self.tree.append_text(begin_text, ix, backslash_escaped);
764 if bytes[ix + 1] == b'`' {
765 let count = 1 + scan_ch_repeat(&bytes[(ix + 2)..], b'`');
766 self.tree.append(Item {
767 start: ix + 1,
768 end: ix + count + 1,
769 body: ItemBody::MaybeCode(count, true),
770 });
771 begin_text = ix + 1 + count;
772 backslash_escaped = false;
773 LoopInstruction::ContinueAndSkip(count)
774 } else if bytes[ix + 1] == b'|' && TableParseMode::Active == mode {
775 // Yeah, it's super weird that backslash escaped pipes in tables aren't "real"
776 // backslash escapes.
777 //
778 // This tree structure is intended for the benefit of inline analysis, and it
779 // is supposed to operate as-if backslash escaped pipes were stripped out in a
780 // separate pass.
781 begin_text = ix + 1;
782 backslash_escaped = false;
783 LoopInstruction::ContinueAndSkip(1)
784 } else if ix + 2 < bytes_len
785 && bytes[ix + 1] == b'\\'
786 && bytes[ix + 2] == b'|'
787 && TableParseMode::Active == mode
788 {
789 // To parse `\\|`, discard the backslashes and parse the `|` that follows it.
790 begin_text = ix + 2;
791 backslash_escaped = true;
792 LoopInstruction::ContinueAndSkip(2)
793 } else {
794 begin_text = ix + 1;
795 backslash_escaped = true;
796 LoopInstruction::ContinueAndSkip(1)
797 }
798 } else {
799 LoopInstruction::ContinueAndSkip(0)
800 }
801 }
802 c @ b'*' | c @ b'_' | c @ b'~' => {
803 let string_suffix = &self.text[ix..];
804 let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c);
805 let can_open = delim_run_can_open(
806 &self.text[start..],
807 string_suffix,
808 count,
809 ix - start,
810 mode,
811 );
812 let can_close = delim_run_can_close(
813 &self.text[start..],
814 string_suffix,
815 count,
816 ix - start,
817 mode,
818 );
819 let is_valid_seq = c != b'~' || count <= 2;
820
821 if (can_open || can_close) && is_valid_seq {
822 self.tree.append_text(begin_text, ix, backslash_escaped);
823 backslash_escaped = false;
824 for i in 0..count {
825 self.tree.append(Item {
826 start: ix + i,
827 end: ix + i + 1,
828 body: ItemBody::MaybeEmphasis(count - i, can_open, can_close),
829 });
830 }
831 begin_text = ix + count;
832 }
833 LoopInstruction::ContinueAndSkip(count - 1)
834 }
835 b'$' => {
836 let string_suffix = &self.text[ix..];
837 let can_open = !string_suffix[1..]
838 .as_bytes()
839 .first()
840 .copied()
841 .map_or(true, is_ascii_whitespace);
842 let can_close = ix > start
843 && !self.text[..ix]
844 .as_bytes()
845 .last()
846 .copied()
847 .map_or(true, is_ascii_whitespace);
848
849 // 0xFFFF_FFFF... represents the root brace context. Using None would require
850 // storing Option<u8>, which is bigger than u8.
851 //
852 // These shouldn't conflict unless you have 255 levels of nesting, which is
853 // past the intended limit anyway.
854 //
855 // Unbalanced braces will cause the root to be changed, which is why it gets
856 // stored here.
857 let brace_context =
858 if self.brace_context_stack.len() > MATH_BRACE_CONTEXT_MAX_NESTING {
859 self.brace_context_next as u8
860 } else {
861 self.brace_context_stack.last().copied().unwrap_or_else(|| {
862 self.brace_context_stack.push(!0);
863 !0
864 })
865 };
866
867 self.tree.append_text(begin_text, ix, backslash_escaped);
868 self.tree.append(Item {
869 start: ix,
870 end: ix + 1,
871 body: ItemBody::MaybeMath(can_open, can_close, brace_context),
872 });
873 begin_text = ix + 1;
874 LoopInstruction::ContinueAndSkip(0)
875 }
876 b'{' => {
877 if self.brace_context_stack.len() == MATH_BRACE_CONTEXT_MAX_NESTING {
878 self.brace_context_stack.push(self.brace_context_next as u8);
879 self.brace_context_next = MATH_BRACE_CONTEXT_MAX_NESTING;
880 } else if self.brace_context_stack.len() > MATH_BRACE_CONTEXT_MAX_NESTING {
881 // When we reach the limit of nesting, switch from actually matching
882 // braces to just counting them.
883 self.brace_context_next += 1;
884 } else if !self.brace_context_stack.is_empty() {
885 // Store nothing if no math environment has been reached yet.
886 self.brace_context_stack.push(self.brace_context_next as u8);
887 self.brace_context_next += 1;
888 }
889 LoopInstruction::ContinueAndSkip(0)
890 }
891 b'}' => {
892 if let &mut [ref mut top_level_context] = &mut self.brace_context_stack[..] {
893 // Unbalanced Braces
894 //
895 // The initial, root top-level brace context is -1, but this is changed whenever an unbalanced
896 // close brace is encountered:
897 //
898 // This is not a math environment: $}$
899 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^|^
900 // -1 |-2
901 //
902 // To ensure this can't get parsed as math, each side of the unbalanced
903 // brace is an irreversibly separate brace context. As long as the math
904 // environment itself contains balanced braces, they should share a top level context.
905 //
906 // Math environment contains 2+2: $}$2+2$
907 // ^^^ this is a math environment
908 *top_level_context = top_level_context.wrapping_sub(1);
909 } else if self.brace_context_stack.len() > MATH_BRACE_CONTEXT_MAX_NESTING {
910 // When we exceed 25 levels of nesting, switch from accurately balancing braces
911 // to just counting them. When we dip back below the limit, switch back.
912 if self.brace_context_next <= MATH_BRACE_CONTEXT_MAX_NESTING {
913 self.brace_context_stack.pop();
914 } else {
915 self.brace_context_next -= 1;
916 }
917 } else {
918 self.brace_context_stack.pop();
919 }
920 LoopInstruction::ContinueAndSkip(0)
921 }
922 b'`' => {
923 self.tree.append_text(begin_text, ix, backslash_escaped);
924 backslash_escaped = false;
925 let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'`');
926 self.tree.append(Item {
927 start: ix,
928 end: ix + count,
929 body: ItemBody::MaybeCode(count, false),
930 });
931 begin_text = ix + count;
932 LoopInstruction::ContinueAndSkip(count - 1)
933 }
934 b'<' if bytes.get(ix + 1) != Some(&b'\\') => {
935 // Note: could detect some non-HTML cases and early escape here, but not
936 // clear that's a win.
937 self.tree.append_text(begin_text, ix, backslash_escaped);
938 backslash_escaped = false;
939 self.tree.append(Item {
940 start: ix,
941 end: ix + 1,
942 body: ItemBody::MaybeHtml,
943 });
944 begin_text = ix + 1;
945 LoopInstruction::ContinueAndSkip(0)
946 }
947 b'!' => {
948 if ix + 1 < bytes_len && bytes[ix + 1] == b'[' {
949 self.tree.append_text(begin_text, ix, backslash_escaped);
950 backslash_escaped = false;
951 self.tree.append(Item {
952 start: ix,
953 end: ix + 2,
954 body: ItemBody::MaybeImage,
955 });
956 begin_text = ix + 2;
957 LoopInstruction::ContinueAndSkip(1)
958 } else {
959 LoopInstruction::ContinueAndSkip(0)
960 }
961 }
962 b'[' => {
963 self.tree.append_text(begin_text, ix, backslash_escaped);
964 backslash_escaped = false;
965 self.tree.append(Item {
966 start: ix,
967 end: ix + 1,
968 body: ItemBody::MaybeLinkOpen,
969 });
970 begin_text = ix + 1;
971 LoopInstruction::ContinueAndSkip(0)
972 }
973 b']' => {
974 self.tree.append_text(begin_text, ix, backslash_escaped);
975 backslash_escaped = false;
976 self.tree.append(Item {
977 start: ix,
978 end: ix + 1,
979 body: ItemBody::MaybeLinkClose(true),
980 });
981 begin_text = ix + 1;
982 LoopInstruction::ContinueAndSkip(0)
983 }
984 b'&' => match scan_entity(&bytes[ix..]) {
985 (n, Some(value)) => {
986 self.tree.append_text(begin_text, ix, backslash_escaped);
987 backslash_escaped = false;
988 self.tree.append(Item {
989 start: ix,
990 end: ix + n,
991 body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)),
992 });
993 begin_text = ix + n;
994 LoopInstruction::ContinueAndSkip(n - 1)
995 }
996 _ => LoopInstruction::ContinueAndSkip(0),
997 },
998 b'|' => {
999 if ix != 0 && bytes[ix - 1] == b'\\' {
1000 LoopInstruction::ContinueAndSkip(0)
1001 } else if let TableParseMode::Active = mode {
1002 LoopInstruction::BreakAtWith(ix, None)
1003 } else {
1004 last_pipe_ix = ix;
1005 pipes += 1;
1006 LoopInstruction::ContinueAndSkip(0)
1007 }
1008 }
1009 b'.' => {
1010 if ix + 2 < bytes.len() && bytes[ix + 1] == b'.' && bytes[ix + 2] == b'.' {
1011 self.tree.append_text(begin_text, ix, backslash_escaped);
1012 backslash_escaped = false;
1013 self.tree.append(Item {
1014 start: ix,
1015 end: ix + 3,
1016 body: ItemBody::SynthesizeChar('…'),
1017 });
1018 begin_text = ix + 3;
1019 LoopInstruction::ContinueAndSkip(2)
1020 } else {
1021 LoopInstruction::ContinueAndSkip(0)
1022 }
1023 }
1024 b'-' => {
1025 let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'-');
1026 if count == 1 {
1027 LoopInstruction::ContinueAndSkip(0)
1028 } else {
1029 let itembody = if count == 2 {
1030 ItemBody::SynthesizeChar('–')
1031 } else if count == 3 {
1032 ItemBody::SynthesizeChar('—')
1033 } else {
1034 let (ems, ens) = match count % 6 {
1035 0 | 3 => (count / 3, 0),
1036 2 | 4 => (0, count / 2),
1037 1 => (count / 3 - 1, 2),
1038 _ => (count / 3, 1),
1039 };
1040 // – and — are 3 bytes each in utf8
1041 let mut buf = String::with_capacity(3 * (ems + ens));
1042 for _ in 0..ems {
1043 buf.push('—');
1044 }
1045 for _ in 0..ens {
1046 buf.push('–');
1047 }
1048 ItemBody::SynthesizeText(self.allocs.allocate_cow(buf.into()))
1049 };
1050
1051 self.tree.append_text(begin_text, ix, backslash_escaped);
1052 backslash_escaped = false;
1053 self.tree.append(Item {
1054 start: ix,
1055 end: ix + count,
1056 body: itembody,
1057 });
1058 begin_text = ix + count;
1059 LoopInstruction::ContinueAndSkip(count - 1)
1060 }
1061 }
1062 c @ b'\'' | c @ b'"' => {
1063 let string_suffix = &self.text[ix..];
1064 let can_open =
1065 delim_run_can_open(&self.text[start..], string_suffix, 1, ix - start, mode);
1066 let can_close = delim_run_can_close(
1067 &self.text[start..],
1068 string_suffix,
1069 1,
1070 ix - start,
1071 mode,
1072 );
1073
1074 self.tree.append_text(begin_text, ix, backslash_escaped);
1075 backslash_escaped = false;
1076 self.tree.append(Item {
1077 start: ix,
1078 end: ix + 1,
1079 body: ItemBody::MaybeSmartQuote(c, can_open, can_close),
1080 });
1081 begin_text = ix + 1;
1082
1083 LoopInstruction::ContinueAndSkip(0)
1084 }
1085 _ => LoopInstruction::ContinueAndSkip(0),
1086 }
1087 });
1088
1089 if brk.is_none() {
1090 let trailing_whitespace =
1091 scan_rev_while(&bytes[begin_text..final_ix], is_ascii_whitespace_no_nl);
1092 // need to close text at eof
1093 self.tree.append_text(
1094 begin_text,
1095 final_ix - trailing_whitespace,
1096 backslash_escaped,
1097 );
1098 }
1099 (final_ix, brk)
1100 }
1101
1102 /// When start_ix is at the beginning of an HTML block of type 1 to 5,
1103 /// this will find the end of the block, adding the block itself to the
1104 /// tree and also keeping track of the lines of HTML within the block.
1105 ///
1106 /// The html_end_tag is the tag that must be found on a line to end the block.
1107 fn parse_html_block_type_1_to_5(
1108 &mut self,
1109 start_ix: usize,
1110 html_end_tag: &str,
1111 mut remaining_space: usize,
1112 mut indent: usize,
1113 ) -> usize {
1114 self.tree.append(Item {
1115 start: start_ix,
1116 end: 0, // set later
1117 body: ItemBody::HtmlBlock,
1118 });
1119 self.tree.push();
1120
1121 let bytes = self.text.as_bytes();
1122 let mut ix = start_ix;
1123 let end_ix;
1124 loop {
1125 let line_start_ix = ix;
1126 ix += scan_nextline(&bytes[ix..]);
1127 self.append_html_line(remaining_space.max(indent), line_start_ix, ix);
1128
1129 let mut line_start = LineStart::new(&bytes[ix..]);
1130 let n_containers = scan_containers(
1131 &self.tree,
1132 &mut line_start,
1133 self.options.has_gfm_footnotes(),
1134 );
1135 if n_containers < self.tree.spine_len() {
1136 end_ix = ix;
1137 break;
1138 }
1139
1140 if self.text[line_start_ix..ix].contains(html_end_tag) {
1141 end_ix = ix;
1142 break;
1143 }
1144
1145 let next_line_ix = ix + line_start.bytes_scanned();
1146 if next_line_ix == self.text.len() {
1147 end_ix = next_line_ix;
1148 break;
1149 }
1150 ix = next_line_ix;
1151 remaining_space = line_start.remaining_space();
1152 indent = 0;
1153 }
1154 self.pop(end_ix);
1155 ix
1156 }
1157
1158 /// When start_ix is at the beginning of an HTML block of type 6 or 7,
1159 /// this will consume lines until there is a blank line and keep track of
1160 /// the HTML within the block.
1161 fn parse_html_block_type_6_or_7(
1162 &mut self,
1163 start_ix: usize,
1164 mut remaining_space: usize,
1165 mut indent: usize,
1166 ) -> usize {
1167 self.tree.append(Item {
1168 start: start_ix,
1169 end: 0, // set later
1170 body: ItemBody::HtmlBlock,
1171 });
1172 self.tree.push();
1173
1174 let bytes = self.text.as_bytes();
1175 let mut ix = start_ix;
1176 let end_ix;
1177 loop {
1178 let line_start_ix = ix;
1179 ix += scan_nextline(&bytes[ix..]);
1180 self.append_html_line(remaining_space.max(indent), line_start_ix, ix);
1181
1182 let mut line_start = LineStart::new(&bytes[ix..]);
1183 let n_containers = scan_containers(
1184 &self.tree,
1185 &mut line_start,
1186 self.options.has_gfm_footnotes(),
1187 );
1188 if n_containers < self.tree.spine_len() || line_start.is_at_eol() {
1189 end_ix = ix;
1190 break;
1191 }
1192
1193 let next_line_ix = ix + line_start.bytes_scanned();
1194 if next_line_ix == self.text.len() || scan_blank_line(&bytes[next_line_ix..]).is_some()
1195 {
1196 end_ix = next_line_ix;
1197 break;
1198 }
1199 ix = next_line_ix;
1200 remaining_space = line_start.remaining_space();
1201 indent = 0;
1202 }
1203 self.pop(end_ix);
1204 ix
1205 }
1206
1207 fn parse_indented_code_block(&mut self, start_ix: usize, mut remaining_space: usize) -> usize {
1208 self.tree.append(Item {
1209 start: start_ix,
1210 end: 0, // will get set later
1211 body: ItemBody::IndentCodeBlock,
1212 });
1213 self.tree.push();
1214 let bytes = self.text.as_bytes();
1215 let mut last_nonblank_child = None;
1216 let mut last_nonblank_ix = 0;
1217 let mut end_ix = 0;
1218 self.last_line_blank = false;
1219
1220 let mut ix = start_ix;
1221 loop {
1222 let line_start_ix = ix;
1223 ix += scan_nextline(&bytes[ix..]);
1224 self.append_code_text(remaining_space, line_start_ix, ix);
1225 // TODO(spec clarification): should we synthesize newline at EOF?
1226
1227 if !self.last_line_blank {
1228 last_nonblank_child = self.tree.cur();
1229 last_nonblank_ix = ix;
1230 end_ix = ix;
1231 }
1232
1233 let mut line_start = LineStart::new(&bytes[ix..]);
1234 let n_containers = scan_containers(
1235 &self.tree,
1236 &mut line_start,
1237 self.options.has_gfm_footnotes(),
1238 );
1239 if n_containers < self.tree.spine_len()
1240 || !(line_start.scan_space(4) || line_start.is_at_eol())
1241 {
1242 break;
1243 }
1244 let next_line_ix = ix + line_start.bytes_scanned();
1245 if next_line_ix == self.text.len() {
1246 break;
1247 }
1248 ix = next_line_ix;
1249 remaining_space = line_start.remaining_space();
1250 self.last_line_blank = scan_blank_line(&bytes[ix..]).is_some();
1251 }
1252
1253 // Trim trailing blank lines.
1254 if let Some(child) = last_nonblank_child {
1255 self.tree[child].next = None;
1256 self.tree[child].item.end = last_nonblank_ix;
1257 }
1258 self.pop(end_ix);
1259 ix
1260 }
1261
1262 fn parse_fenced_code_block(
1263 &mut self,
1264 start_ix: usize,
1265 indent: usize,
1266 fence_ch: u8,
1267 n_fence_char: usize,
1268 ) -> usize {
1269 let bytes = self.text.as_bytes();
1270 let mut info_start = start_ix + n_fence_char;
1271 info_start += scan_whitespace_no_nl(&bytes[info_start..]);
1272 // TODO: info strings are typically very short. wouldn't it be faster
1273 // to just do a forward scan here?
1274 let mut ix = info_start + scan_nextline(&bytes[info_start..]);
1275 let info_end = ix - scan_rev_while(&bytes[info_start..ix], is_ascii_whitespace);
1276 let info_string = unescape(&self.text[info_start..info_end], self.tree.is_in_table());
1277 self.tree.append(Item {
1278 start: start_ix,
1279 end: 0, // will get set later
1280 body: ItemBody::FencedCodeBlock(self.allocs.allocate_cow(info_string)),
1281 });
1282 self.tree.push();
1283 loop {
1284 let mut line_start = LineStart::new(&bytes[ix..]);
1285 let n_containers = scan_containers(
1286 &self.tree,
1287 &mut line_start,
1288 self.options.has_gfm_footnotes(),
1289 );
1290 if n_containers < self.tree.spine_len() {
1291 // this line will get parsed again as not being part of the code
1292 // if it's blank, it should be parsed as a blank line
1293 self.pop(ix);
1294 return ix;
1295 }
1296 line_start.scan_space(indent);
1297 let mut close_line_start = line_start.clone();
1298 if !close_line_start.scan_space(4 - indent) {
1299 let close_ix = ix + close_line_start.bytes_scanned();
1300 if let Some(n) = scan_closing_code_fence(&bytes[close_ix..], fence_ch, n_fence_char)
1301 {
1302 ix = close_ix + n;
1303 self.pop(ix);
1304 // try to read trailing whitespace or it will register as a completely blank line
1305 return ix + scan_blank_line(&bytes[ix..]).unwrap_or(0);
1306 }
1307 }
1308 let remaining_space = line_start.remaining_space();
1309 ix += line_start.bytes_scanned();
1310 let next_ix = ix + scan_nextline(&bytes[ix..]);
1311 self.append_code_text(remaining_space, ix, next_ix);
1312 ix = next_ix;
1313 }
1314 }
1315
1316 fn parse_metadata_block(&mut self, start_ix: usize, metadata_block_ch: u8) -> usize {
1317 let bytes = self.text.as_bytes();
1318 let metadata_block_kind = match metadata_block_ch {
1319 b'-' => MetadataBlockKind::YamlStyle,
1320 b'+' => MetadataBlockKind::PlusesStyle,
1321 _ => panic!("Erroneous metadata block character when parsing metadata block"),
1322 };
1323 // 3 delimiter characters
1324 let mut ix = start_ix + 3 + scan_nextline(&bytes[start_ix + 3..]);
1325 self.tree.append(Item {
1326 start: start_ix,
1327 end: 0, // will get set later
1328 body: ItemBody::MetadataBlock(metadata_block_kind),
1329 });
1330 self.tree.push();
1331 loop {
1332 let mut line_start = LineStart::new(&bytes[ix..]);
1333 let n_containers = scan_containers(
1334 &self.tree,
1335 &mut line_start,
1336 self.options.has_gfm_footnotes(),
1337 );
1338 if n_containers < self.tree.spine_len() {
1339 break;
1340 }
1341 if let (_, 0) = calc_indent(&bytes[ix..], 4) {
1342 if let Some(n) = scan_closing_metadata_block(&bytes[ix..], metadata_block_ch) {
1343 ix += n;
1344 break;
1345 }
1346 }
1347 let remaining_space = line_start.remaining_space();
1348 ix += line_start.bytes_scanned();
1349 let next_ix = ix + scan_nextline(&bytes[ix..]);
1350 self.append_code_text(remaining_space, ix, next_ix);
1351 ix = next_ix;
1352 }
1353
1354 self.pop(ix);
1355
1356 // try to read trailing whitespace or it will register as a completely blank line
1357 ix + scan_blank_line(&bytes[ix..]).unwrap_or(0)
1358 }
1359
1360 fn append_code_text(&mut self, remaining_space: usize, start: usize, end: usize) {
1361 if remaining_space > 0 {
1362 let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into());
1363 self.tree.append(Item {
1364 start,
1365 end: start,
1366 body: ItemBody::SynthesizeText(cow_ix),
1367 });
1368 }
1369 if self.text.as_bytes()[end - 2] == b'\r' {
1370 // Normalize CRLF to LF
1371 self.tree.append_text(start, end - 2, false);
1372 self.tree.append_text(end - 1, end, false);
1373 } else {
1374 self.tree.append_text(start, end, false);
1375 }
1376 }
1377
1378 /// Appends a line of HTML to the tree.
1379 fn append_html_line(&mut self, remaining_space: usize, start: usize, end: usize) {
1380 if remaining_space > 0 {
1381 let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into());
1382 self.tree.append(Item {
1383 start,
1384 end: start,
1385 body: ItemBody::SynthesizeText(cow_ix),
1386 });
1387 }
1388 if self.text.as_bytes()[end - 2] == b'\r' {
1389 // Normalize CRLF to LF
1390 self.tree.append(Item {
1391 start,
1392 end: end - 2,
1393 body: ItemBody::Html,
1394 });
1395 self.tree.append(Item {
1396 start: end - 1,
1397 end,
1398 body: ItemBody::Html,
1399 });
1400 } else {
1401 self.tree.append(Item {
1402 start,
1403 end,
1404 body: ItemBody::Html,
1405 });
1406 }
1407 }
1408
1409 /// Pop a container, setting its end.
1410 fn pop(&mut self, ix: usize) {
1411 let cur_ix = self.tree.pop().unwrap();
1412 self.tree[cur_ix].item.end = ix;
1413 if let ItemBody::List(true, _, _) = self.tree[cur_ix].item.body {
1414 surgerize_tight_list(&mut self.tree, cur_ix);
1415 self.begin_list_item = None;
1416 }
1417 }
1418
1419 /// Close a list if it's open. Also set loose if last line was blank
1420 /// and end current list if it's a lone, empty item
1421 fn finish_list(&mut self, ix: usize) {
1422 self.finish_empty_list_item();
1423 if let Some(node_ix) = self.tree.peek_up() {
1424 if let ItemBody::List(_, _, _) = self.tree[node_ix].item.body {
1425 self.pop(ix);
1426 }
1427 }
1428 if self.last_line_blank {
1429 if let Some(node_ix) = self.tree.peek_grandparent() {
1430 if let ItemBody::List(ref mut is_tight, _, _) = self.tree[node_ix].item.body {
1431 *is_tight = false;
1432 }
1433 }
1434 self.last_line_blank = false;
1435 }
1436 }
1437
1438 fn finish_empty_list_item(&mut self) {
1439 if let Some(begin_list_item) = self.begin_list_item {
1440 if self.last_line_blank {
1441 // A list item can begin with at most one blank line.
1442 if let Some(node_ix) = self.tree.peek_up() {
1443 if let ItemBody::ListItem(_) = self.tree[node_ix].item.body {
1444 self.pop(begin_list_item);
1445 }
1446 }
1447 }
1448 }
1449 self.begin_list_item = None;
1450 }
1451
1452 /// Continue an existing list or start a new one if there's not an open
1453 /// list that matches.
1454 fn continue_list(&mut self, start: usize, ch: u8, index: u64) {
1455 self.finish_empty_list_item();
1456 if let Some(node_ix) = self.tree.peek_up() {
1457 if let ItemBody::List(ref mut is_tight, existing_ch, _) = self.tree[node_ix].item.body {
1458 if existing_ch == ch {
1459 if self.last_line_blank {
1460 *is_tight = false;
1461 self.last_line_blank = false;
1462 }
1463 return;
1464 }
1465 }
1466 // TODO: this is not the best choice for end; maybe get end from last list item.
1467 self.finish_list(start);
1468 }
1469 self.tree.append(Item {
1470 start,
1471 end: 0, // will get set later
1472 body: ItemBody::List(true, ch, index),
1473 });
1474 self.tree.push();
1475 self.last_line_blank = false;
1476 }
1477
1478 /// Parse a thematic break.
1479 ///
1480 /// Returns index of start of next line.
1481 fn parse_hrule(&mut self, hrule_size: usize, ix: usize) -> usize {
1482 self.tree.append(Item {
1483 start: ix,
1484 end: ix + hrule_size,
1485 body: ItemBody::Rule,
1486 });
1487 ix + hrule_size
1488 }
1489
1490 /// Parse an ATX heading.
1491 ///
1492 /// Returns index of start of next line.
1493 fn parse_atx_heading(&mut self, start: usize, atx_level: HeadingLevel) -> usize {
1494 let mut ix = start;
1495 let heading_ix = self.tree.append(Item {
1496 start,
1497 end: 0, // set later
1498 body: ItemBody::default(), // set later
1499 });
1500 ix += atx_level as usize;
1501 // next char is space or eol (guaranteed by scan_atx_heading)
1502 let bytes = self.text.as_bytes();
1503 if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
1504 self.tree[heading_ix].item.end = ix + eol_bytes;
1505 self.tree[heading_ix].item.body = ItemBody::Heading(atx_level, None);
1506 return ix + eol_bytes;
1507 }
1508 // skip leading spaces
1509 let skip_spaces = scan_whitespace_no_nl(&bytes[ix..]);
1510 ix += skip_spaces;
1511
1512 // now handle the header text
1513 let header_start = ix;
1514 let header_node_idx = self.tree.push(); // so that we can set the endpoint later
1515
1516 // trim the trailing attribute block before parsing the entire line, if necessary
1517 let (end, content_end, attrs) = if self.options.contains(Options::ENABLE_HEADING_ATTRIBUTES)
1518 {
1519 // the start of the next line is the end of the header since the
1520 // header cannot have line breaks
1521 let header_end = header_start + scan_nextline(&bytes[header_start..]);
1522 let (content_end, attrs) =
1523 self.extract_and_parse_heading_attribute_block(header_start, header_end);
1524 self.parse_line(ix, Some(content_end), TableParseMode::Disabled);
1525 (header_end, content_end, attrs)
1526 } else {
1527 let (line_ix, line_brk) = self.parse_line(ix, None, TableParseMode::Disabled);
1528 ix = line_ix;
1529 // Backslash at end is actually hard line break
1530 if let Some(Item {
1531 start,
1532 end,
1533 body: ItemBody::HardBreak(true),
1534 }) = line_brk
1535 {
1536 self.tree.append_text(start, end, false);
1537 }
1538 (ix, ix, None)
1539 };
1540 self.tree[header_node_idx].item.end = end;
1541
1542 // remove trailing matter from header text
1543 let mut empty_text_node = false;
1544 if let Some(cur_ix) = self.tree.cur() {
1545 // remove closing of the ATX heading
1546 let header_text = &bytes[header_start..content_end];
1547 let mut limit = header_text
1548 .iter()
1549 .rposition(|&b| !(b == b'\n' || b == b'\r' || b == b' '))
1550 .map_or(0, |i| i + 1);
1551 let closer = header_text[..limit]
1552 .iter()
1553 .rposition(|&b| b != b'#')
1554 .map_or(0, |i| i + 1);
1555 if closer == 0 {
1556 limit = closer;
1557 } else {
1558 let spaces = scan_rev_while(&header_text[..closer], |b| b == b' ');
1559 if spaces > 0 {
1560 limit = closer - spaces;
1561 }
1562 }
1563 // if text is only spaces, then remove them
1564 self.tree[cur_ix].item.end = limit + header_start;
1565
1566 // limit = 0 when text is empty after removing spaces
1567 if limit == 0 {
1568 empty_text_node = true;
1569 }
1570 }
1571
1572 if empty_text_node {
1573 self.tree.remove_node();
1574 } else {
1575 self.tree.pop();
1576 }
1577 self.tree[heading_ix].item.body = ItemBody::Heading(
1578 atx_level,
1579 attrs.map(|attrs| self.allocs.allocate_heading(attrs)),
1580 );
1581
1582 end
1583 }
1584
1585 /// Returns the number of bytes scanned on success.
1586 fn parse_footnote(&mut self, start: usize) -> Option<usize> {
1587 let bytes = &self.text.as_bytes()[start..];
1588 if !bytes.starts_with(b"[^") {
1589 return None;
1590 }
1591 let (mut i, label) = if self.options.has_gfm_footnotes() {
1592 // GitHub doesn't allow footnote definition labels to contain line breaks.
1593 // It actually does allow this for link definitions under certain circumstances,
1594 // but for this it's simpler to avoid it.
1595 scan_link_label_rest(&self.text[start + 2..], &|_| None, self.tree.is_in_table())?
1596 } else {
1597 self.parse_refdef_label(start + 2)?
1598 };
1599 if self.options.has_gfm_footnotes() && label.bytes().any(|b| b == b'\r' || b == b'\n') {
1600 // GitHub doesn't allow footnote definition labels to contain line breaks,
1601 // even if they're escaped.
1602 return None;
1603 }
1604 i += 2;
1605 if scan_ch(&bytes[i..], b':') == 0 {
1606 return None;
1607 }
1608 i += 1;
1609 self.finish_list(start);
1610 if let Some(node_ix) = self.tree.peek_up() {
1611 if let ItemBody::FootnoteDefinition(..) = self.tree[node_ix].item.body {
1612 // finish previous footnote if it's still open
1613 self.pop(start);
1614 }
1615 }
1616 if self.options.has_gfm_footnotes() {
1617 i += scan_whitespace_no_nl(&bytes[i..]);
1618 }
1619 self.allocs.footdefs.0.insert(
1620 UniCase::new(label.clone()),
1621 FootnoteDef { use_count: 0 },
1622 );
1623 self.tree.append(Item {
1624 start,
1625 end: 0, // will get set later
1626 // TODO: check whether the label here is strictly necessary
1627 body: ItemBody::FootnoteDefinition(self.allocs.allocate_cow(label)),
1628 });
1629 self.tree.push();
1630 Some(i)
1631 }
1632
1633 /// Tries to parse a reference label, which can be interrupted by new blocks.
1634 /// On success, returns the number of bytes of the label and the label itself.
1635 fn parse_refdef_label(&self, start: usize) -> Option<(usize, CowStr<'a>)> {
1636 scan_link_label_rest(
1637 &self.text[start..],
1638 &|bytes| {
1639 let mut line_start = LineStart::new(bytes);
1640 let current_container = scan_containers(
1641 &self.tree,
1642 &mut line_start,
1643 self.options.has_gfm_footnotes(),
1644 ) == self.tree.spine_len();
1645 if line_start.scan_space(4) {
1646 return Some(line_start.bytes_scanned());
1647 }
1648 let bytes_scanned = line_start.bytes_scanned();
1649 let suffix = &bytes[bytes_scanned..];
1650 if self.scan_paragraph_interrupt(suffix, current_container)
1651 || (current_container && scan_setext_heading(suffix).is_some())
1652 {
1653 None
1654 } else {
1655 Some(bytes_scanned)
1656 }
1657 },
1658 self.tree.is_in_table(),
1659 )
1660 }
1661
1662 /// Returns number of bytes scanned, label and definition on success.
1663 fn parse_refdef_total(&mut self, start: usize) -> Option<(usize, LinkLabel<'a>, LinkDef<'a>)> {
1664 let bytes = &self.text.as_bytes()[start..];
1665 if scan_ch(bytes, b'[') == 0 {
1666 return None;
1667 }
1668 let (mut i, label) = self.parse_refdef_label(start + 1)?;
1669 i += 1;
1670 if scan_ch(&bytes[i..], b':') == 0 {
1671 return None;
1672 }
1673 i += 1;
1674 let (bytecount, link_def) = self.scan_refdef(start, start + i)?;
1675 Some((bytecount + i, UniCase::new(label), link_def))
1676 }
1677
1678 /// Returns number of bytes and number of newlines
1679 fn scan_refdef_space(&self, bytes: &[u8], mut i: usize) -> Option<(usize, usize)> {
1680 let mut newlines = 0;
1681 loop {
1682 let whitespaces = scan_whitespace_no_nl(&bytes[i..]);
1683 i += whitespaces;
1684 if let Some(eol_bytes) = scan_eol(&bytes[i..]) {
1685 i += eol_bytes;
1686 newlines += 1;
1687 if newlines > 1 {
1688 return None;
1689 }
1690 } else {
1691 break;
1692 }
1693 let mut line_start = LineStart::new(&bytes[i..]);
1694 let current_container = scan_containers(
1695 &self.tree,
1696 &mut line_start,
1697 self.options.has_gfm_footnotes(),
1698 ) == self.tree.spine_len();
1699 if !line_start.scan_space(4) {
1700 let suffix = &bytes[i + line_start.bytes_scanned()..];
1701 if self.scan_paragraph_interrupt(suffix, current_container)
1702 || scan_setext_heading(suffix).is_some()
1703 {
1704 return None;
1705 }
1706 }
1707 i += line_start.bytes_scanned();
1708 }
1709 Some((i, newlines))
1710 }
1711
1712 // returns (bytelength, title_str)
1713 fn scan_refdef_title<'t>(&self, text: &'t str) -> Option<(usize, CowStr<'t>)> {
1714 let bytes = text.as_bytes();
1715 let closing_delim = match bytes.first()? {
1716 b'\'' => b'\'',
1717 b'"' => b'"',
1718 b'(' => b')',
1719 _ => return None,
1720 };
1721 let mut bytecount = 1;
1722 let mut linestart = 1;
1723
1724 let mut linebuf = None;
1725
1726 while let Some(&c) = bytes.get(bytecount) {
1727 match c {
1728 b'(' if closing_delim == b')' => {
1729 // https://spec.commonmark.org/0.30/#link-title
1730 // a sequence of zero or more characters between matching parentheses ((...)),
1731 // including a ( or ) character only if it is backslash-escaped.
1732 return None;
1733 }
1734 b'\n' | b'\r' => {
1735 // push text to line buffer
1736 // this is used to strip the block formatting:
1737 //
1738 // > [first]: http://example.com "
1739 // > second"
1740 //
1741 // should get turned into `<a href="example.com" title=" second">first</a>`
1742 let linebuf = if let Some(linebuf) = &mut linebuf {
1743 linebuf
1744 } else {
1745 linebuf = Some(String::new());
1746 linebuf.as_mut().unwrap()
1747 };
1748 linebuf.push_str(&text[linestart..bytecount]);
1749 linebuf.push('\n'); // normalize line breaks
1750 // skip line break
1751 bytecount += 1;
1752 if c == b'\r' && bytes.get(bytecount) == Some(&b'\n') {
1753 bytecount += 1;
1754 }
1755 let mut line_start = LineStart::new(&bytes[bytecount..]);
1756 let current_container = scan_containers(
1757 &self.tree,
1758 &mut line_start,
1759 self.options.has_gfm_footnotes(),
1760 ) == self.tree.spine_len();
1761 if !line_start.scan_space(4) {
1762 let suffix = &bytes[bytecount + line_start.bytes_scanned()..];
1763 if self.scan_paragraph_interrupt(suffix, current_container)
1764 || scan_setext_heading(suffix).is_some()
1765 {
1766 return None;
1767 }
1768 }
1769 line_start.scan_all_space();
1770 bytecount += line_start.bytes_scanned();
1771 linestart = bytecount;
1772 if scan_blank_line(&bytes[bytecount..]).is_some() {
1773 // blank line - not allowed
1774 return None;
1775 }
1776 }
1777 b'\\' => {
1778 bytecount += 1;
1779 if let Some(c) = bytes.get(bytecount) {
1780 if c != &b'\r' && c != &b'\n' {
1781 bytecount += 1;
1782 }
1783 }
1784 }
1785 c if c == closing_delim => {
1786 let cow = if let Some(mut linebuf) = linebuf {
1787 linebuf.push_str(&text[linestart..bytecount]);
1788 CowStr::from(linebuf)
1789 } else {
1790 CowStr::from(&text[linestart..bytecount])
1791 };
1792 return Some((bytecount + 1, cow));
1793 }
1794 _ => {
1795 bytecount += 1;
1796 }
1797 }
1798 }
1799 None
1800 }
1801
1802 /// Returns # of bytes and definition.
1803 /// Assumes the label of the reference including colon has already been scanned.
1804 fn scan_refdef(&self, span_start: usize, start: usize) -> Option<(usize, LinkDef<'a>)> {
1805 let bytes = self.text.as_bytes();
1806
1807 // whitespace between label and url (including up to one newline)
1808 let (mut i, _newlines) = self.scan_refdef_space(bytes, start)?;
1809
1810 // scan link dest
1811 let (dest_length, dest) = scan_link_dest(self.text, i, LINK_MAX_NESTED_PARENS)?;
1812 if dest_length == 0 {
1813 return None;
1814 }
1815 let dest = unescape(dest, self.tree.is_in_table());
1816 i += dest_length;
1817
1818 // no title
1819 let mut backup = (
1820 i - start,
1821 LinkDef {
1822 dest,
1823 title: None,
1824 span: span_start..i,
1825 },
1826 );
1827
1828 // scan whitespace between dest and label
1829 let (mut i, newlines) =
1830 if let Some((new_i, mut newlines)) = self.scan_refdef_space(bytes, i) {
1831 if i == self.text.len() {
1832 newlines += 1;
1833 }
1834 if new_i == i && newlines == 0 {
1835 return None;
1836 }
1837 if newlines > 1 {
1838 return Some(backup);
1839 };
1840 (new_i, newlines)
1841 } else {
1842 return Some(backup);
1843 };
1844
1845 // scan title
1846 // if this fails but newline == 1, return also a refdef without title
1847 if let Some((title_length, title)) = self.scan_refdef_title(&self.text[i..]) {
1848 i += title_length;
1849 if scan_blank_line(&bytes[i..]).is_some() {
1850 backup.0 = i - start;
1851 backup.1.span = span_start..i;
1852 backup.1.title = Some(unescape(title, self.tree.is_in_table()));
1853 return Some(backup);
1854 }
1855 }
1856 if newlines > 0 {
1857 Some(backup)
1858 } else {
1859 None
1860 }
1861 }
1862
1863 /// Checks whether we should break a paragraph on the given input.
1864 fn scan_paragraph_interrupt(&self, bytes: &[u8], current_container: bool) -> bool {
1865 let has_footnote = self.options.contains(Options::ENABLE_FOOTNOTES);
1866 if scan_paragraph_interrupt_no_table(bytes, current_container, has_footnote, &self.tree) {
1867 return true;
1868 }
1869 // pulldown-cmark allows heavy tables, that have a `|` on the header row,
1870 // to interrupt paragraphs.
1871 //
1872 // ```markdown
1873 // This is a table
1874 // | a | b | c |
1875 // |---|---|---|
1876 // | d | e | f |
1877 //
1878 // This is not a table
1879 // a | b | c
1880 // ---|---|---
1881 // d | e | f
1882 // ```
1883 if !self.options.contains(Options::ENABLE_TABLES) || !bytes.starts_with(b"|") {
1884 return false;
1885 }
1886
1887 // Checking if something's a valid table or not requires looking at two lines.
1888 // First line, count unescaped pipes.
1889 let mut pipes = 0;
1890 let mut next_line_ix = 0;
1891 let mut bsesc = false;
1892 let mut last_pipe_ix = 0;
1893 for (i, &byte) in bytes.iter().enumerate() {
1894 match byte {
1895 b'\\' => {
1896 bsesc = true;
1897 continue;
1898 }
1899 b'|' if !bsesc => {
1900 pipes += 1;
1901 last_pipe_ix = i;
1902 }
1903 b'\r' | b'\n' => {
1904 next_line_ix = i + scan_eol(&bytes[i..]).unwrap();
1905 break;
1906 }
1907 _ => {}
1908 }
1909 bsesc = false;
1910 }
1911
1912 // scan_eol can't return 0, so this can't be zero
1913 if next_line_ix == 0 {
1914 return false;
1915 }
1916
1917 // Scan the table head. The part that looks like:
1918 //
1919 // |---|---|---|
1920 //
1921 // Also scan any containing items, since it's on its own line, and
1922 // might be nested inside a block quote or something
1923 //
1924 // > Table: First
1925 // > | first col | second col |
1926 // > |-----------|------------|
1927 // ^
1928 // | need to skip over the `>` when checking for the table
1929 let mut line_start = LineStart::new(&bytes[next_line_ix..]);
1930 if scan_containers(
1931 &self.tree,
1932 &mut line_start,
1933 self.options.has_gfm_footnotes(),
1934 ) != self.tree.spine_len()
1935 {
1936 return false;
1937 }
1938 let table_head_ix = next_line_ix + line_start.bytes_scanned();
1939 let (table_head_bytes, alignment) = scan_table_head(&bytes[table_head_ix..]);
1940
1941 if table_head_bytes == 0 {
1942 return false;
1943 }
1944
1945 // computing header count from number of pipes
1946 let header_count = count_header_cols(bytes, pipes, 0, last_pipe_ix);
1947
1948 // make sure they match the number of columns we find in separator line
1949 alignment.len() == header_count
1950 }
1951
1952 /// Extracts and parses a heading attribute block if exists.
1953 ///
1954 /// Returns `(end_offset_of_heading_content, (id, classes))`.
1955 ///
1956 /// If `header_end` is less than or equal to `header_start`, the given
1957 /// input is considered as empty.
1958 fn extract_and_parse_heading_attribute_block(
1959 &mut self,
1960 header_start: usize,
1961 header_end: usize,
1962 ) -> (usize, Option<HeadingAttributes<'a>>) {
1963 if !self.options.contains(Options::ENABLE_HEADING_ATTRIBUTES) {
1964 return (header_end, None);
1965 }
1966
1967 // extract the trailing attribute block
1968 let header_bytes = &self.text.as_bytes()[header_start..header_end];
1969 let (content_len, attr_block_range_rel) =
1970 extract_attribute_block_content_from_header_text(header_bytes);
1971 let content_end = header_start + content_len;
1972 let attrs = attr_block_range_rel.and_then(|r| {
1973 parse_inside_attribute_block(
1974 &self.text[(header_start + r.start)..(header_start + r.end)],
1975 )
1976 });
1977 (content_end, attrs)
1978 }
1979}
1980
1981/// Scanning modes for `Parser`'s `parse_line` method.
1982#[derive(PartialEq, Eq, Copy, Clone)]
1983enum TableParseMode {
1984 /// Inside a paragraph, scanning for table headers.
1985 Scan,
1986 /// Inside a table.
1987 Active,
1988 /// Inside a paragraph, not scanning for table headers.
1989 Disabled,
1990}
1991
1992/// Computes the number of header columns in a table line by computing the number of dividing pipes
1993/// that aren't followed or preceded by whitespace.
1994fn count_header_cols(
1995 bytes: &[u8],
1996 mut pipes: usize,
1997 mut start: usize,
1998 last_pipe_ix: usize,
1999) -> usize {
2000 // was first pipe preceded by whitespace? if so, subtract one
2001 start += scan_whitespace_no_nl(&bytes[start..]);
2002 if bytes[start] == b'|' {
2003 pipes -= 1;
2004 }
2005
2006 // was last pipe followed by whitespace? if so, sub one
2007 if scan_blank_line(&bytes[(last_pipe_ix + 1)..]).is_some() {
2008 pipes
2009 } else {
2010 pipes + 1
2011 }
2012}
2013
2014/// Checks whether we should break a paragraph on the given input.
2015///
2016/// Use `FirstPass::scan_paragraph_interrupt` in any context that allows
2017/// tables to interrupt the paragraph.
2018fn scan_paragraph_interrupt_no_table(
2019 bytes: &[u8],
2020 current_container: bool,
2021 has_footnote: bool,
2022 tree: &Tree<Item>,
2023) -> bool {
2024 scan_eol(bytes).is_some()
2025 || scan_hrule(bytes).is_ok()
2026 || scan_atx_heading(bytes).is_some()
2027 || scan_code_fence(bytes).is_some()
2028 || scan_blockquote_start(bytes).is_some()
2029 || scan_listitem(bytes).map_or(false, |(ix, delim, index, _)| {
2030 ! current_container ||
2031 tree.is_in_table() ||
2032 // we don't allow interruption by either empty lists or
2033 // numbered lists starting at an index other than 1
2034 (delim == b'*' || delim == b'-' || delim == b'+' || index == 1)
2035 && (scan_blank_line(&bytes[ix..]).is_none())
2036 })
2037 || bytes.starts_with(b"<")
2038 && (get_html_end_tag(&bytes[1..]).is_some() || starts_html_block_type_6(&bytes[1..]))
2039 || (has_footnote
2040 && bytes.starts_with(b"[^")
2041 && scan_link_label_rest(
2042 std::str::from_utf8(&bytes[2..]).unwrap(),
2043 &|_| None,
2044 tree.is_in_table(),
2045 )
2046 .map_or(false, |(len, _)| bytes.get(2 + len) == Some(&b':')))
2047}
2048
2049/// Assumes `text_bytes` is preceded by `<`.
2050fn get_html_end_tag(text_bytes: &[u8]) -> Option<&'static str> {
2051 static BEGIN_TAGS: &[&[u8]; 4] = &[b"pre", b"style", b"script", b"textarea"];
2052 static ST_BEGIN_TAGS: &[&[u8]; 3] = &[b"!--", b"?", b"![CDATA["];
2053
2054 for (beg_tag, end_tag) in BEGIN_TAGS
2055 .iter()
2056 .zip(["</pre>", "</style>", "</script>", "</textarea>"].iter())
2057 {
2058 let tag_len = beg_tag.len();
2059
2060 if text_bytes.len() < tag_len {
2061 // begin tags are increasing in size
2062 break;
2063 }
2064
2065 if !text_bytes[..tag_len].eq_ignore_ascii_case(beg_tag) {
2066 continue;
2067 }
2068
2069 // Must either be the end of the line...
2070 if text_bytes.len() == tag_len {
2071 return Some(end_tag);
2072 }
2073
2074 // ...or be followed by whitespace, newline, or '>'.
2075 let s = text_bytes[tag_len];
2076 if is_ascii_whitespace(s) || s == b'>' {
2077 return Some(end_tag);
2078 }
2079 }
2080
2081 for (beg_tag, end_tag) in ST_BEGIN_TAGS.iter().zip(["-->", "?>", "]]>"].iter()) {
2082 if text_bytes.starts_with(beg_tag) {
2083 return Some(end_tag);
2084 }
2085 }
2086
2087 if text_bytes.len() > 1 && text_bytes[0] == b'!' && text_bytes[1].is_ascii_alphabetic() {
2088 Some(">")
2089 } else {
2090 None
2091 }
2092}
2093
2094// https://english.stackexchange.com/a/285573
2095fn surgerize_tight_list(tree: &mut Tree<Item>, list_ix: TreeIndex) {
2096 let mut list_item = tree[list_ix].child;
2097 while let Some(listitem_ix) = list_item {
2098 // first child is special, controls how we repoint list_item.child
2099 let list_item_firstborn = tree[listitem_ix].child;
2100
2101 // Check that list item has children - this is not necessarily the case!
2102 if let Some(firstborn_ix) = list_item_firstborn {
2103 if let ItemBody::Paragraph = tree[firstborn_ix].item.body {
2104 tree[listitem_ix].child = tree[firstborn_ix].child;
2105 }
2106
2107 let mut list_item_child = Some(firstborn_ix);
2108 let mut node_to_repoint = None;
2109 while let Some(child_ix) = list_item_child {
2110 // surgerize paragraphs
2111 let repoint_ix = if let ItemBody::Paragraph = tree[child_ix].item.body {
2112 if let Some(child_firstborn) = tree[child_ix].child {
2113 if let Some(repoint_ix) = node_to_repoint {
2114 tree[repoint_ix].next = Some(child_firstborn);
2115 }
2116 let mut child_lastborn = child_firstborn;
2117 while let Some(lastborn_next_ix) = tree[child_lastborn].next {
2118 child_lastborn = lastborn_next_ix;
2119 }
2120 child_lastborn
2121 } else {
2122 child_ix
2123 }
2124 } else {
2125 child_ix
2126 };
2127
2128 node_to_repoint = Some(repoint_ix);
2129 tree[repoint_ix].next = tree[child_ix].next;
2130 list_item_child = tree[child_ix].next;
2131 }
2132 }
2133
2134 list_item = tree[listitem_ix].next;
2135 }
2136}
2137
2138/// Determines whether the delimiter run starting at given index is
2139/// left-flanking, as defined by the commonmark spec (and isn't intraword
2140/// for _ delims).
2141/// suffix is &s[ix..], which is passed in as an optimization, since taking
2142/// a string subslice is O(n).
2143fn delim_run_can_open(
2144 s: &str,
2145 suffix: &str,
2146 run_len: usize,
2147 ix: usize,
2148 mode: TableParseMode,
2149) -> bool {
2150 let next_char = if let Some(c) = suffix.chars().nth(run_len) {
2151 c
2152 } else {
2153 return false;
2154 };
2155 if next_char.is_whitespace() {
2156 return false;
2157 }
2158 if ix == 0 {
2159 return true;
2160 }
2161 if mode == TableParseMode::Active {
2162 if s[..ix].ends_with('|') && !s[..ix].ends_with(r"\|") {
2163 return true;
2164 }
2165 if next_char == '|' {
2166 return false;
2167 }
2168 }
2169 let delim = suffix.chars().next().unwrap();
2170 // `*` and `~~` can be intraword, `_` and `~` cannot
2171 if delim == '*' && !is_punctuation(next_char) {
2172 return true;
2173 }
2174 if delim == '~' && run_len > 1 {
2175 return true;
2176 }
2177 let prev_char = s[..ix].chars().last().unwrap();
2178 if delim == '~' && prev_char == '~' && !is_punctuation(next_char) {
2179 return true;
2180 }
2181
2182 prev_char.is_whitespace()
2183 || is_punctuation(prev_char) && (delim != '\'' || ![']', ')'].contains(&prev_char))
2184}
2185
2186/// Determines whether the delimiter run starting at given index is
2187/// right-flanking, as defined by the commonmark spec (and isn't intraword
2188/// for _ delims)
2189fn delim_run_can_close(
2190 s: &str,
2191 suffix: &str,
2192 run_len: usize,
2193 ix: usize,
2194 mode: TableParseMode,
2195) -> bool {
2196 if ix == 0 {
2197 return false;
2198 }
2199 let prev_char = s[..ix].chars().last().unwrap();
2200 if prev_char.is_whitespace() {
2201 return false;
2202 }
2203 let next_char = if let Some(c) = suffix.chars().nth(run_len) {
2204 c
2205 } else {
2206 return true;
2207 };
2208 if mode == TableParseMode::Active {
2209 if s[..ix].ends_with('|') && !s[..ix].ends_with(r"\|") {
2210 return false;
2211 }
2212 if next_char == '|' {
2213 return true;
2214 }
2215 }
2216 let delim = suffix.chars().next().unwrap();
2217 // `*` and `~~` can be intraword, `_` and `~` cannot
2218 if (delim == '*' || (delim == '~' && run_len > 1)) && !is_punctuation(prev_char) {
2219 return true;
2220 }
2221 if delim == '~' && prev_char == '~' {
2222 return true;
2223 }
2224
2225 next_char.is_whitespace() || is_punctuation(next_char)
2226}
2227
2228fn create_lut(options: &Options) -> LookupTable {
2229 #[cfg(all(target_arch = "x86_64", feature = "simd"))]
2230 {
2231 LookupTable {
2232 simd: simd::compute_lookup(options),
2233 scalar: special_bytes(options),
2234 }
2235 }
2236 #[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
2237 {
2238 special_bytes(options)
2239 }
2240}
2241
2242fn special_bytes(options: &Options) -> [bool; 256] {
2243 let mut bytes = [false; 256];
2244 let standard_bytes = [
2245 b'\n', b'\r', b'*', b'_', b'&', b'\\', b'[', b']', b'<', b'!', b'`',
2246 ];
2247
2248 for &byte in &standard_bytes {
2249 bytes[byte as usize] = true;
2250 }
2251 if options.contains(Options::ENABLE_TABLES) {
2252 bytes[b'|' as usize] = true;
2253 }
2254 if options.contains(Options::ENABLE_STRIKETHROUGH) {
2255 bytes[b'~' as usize] = true;
2256 }
2257 if options.contains(Options::ENABLE_MATH) {
2258 bytes[b'$' as usize] = true;
2259 bytes[b'{' as usize] = true;
2260 bytes[b'}' as usize] = true;
2261 }
2262 if options.contains(Options::ENABLE_SMART_PUNCTUATION) {
2263 for &byte in &[b'.', b'-', b'"', b'\''] {
2264 bytes[byte as usize] = true;
2265 }
2266 }
2267
2268 bytes
2269}
2270
2271enum LoopInstruction<T> {
2272 /// Continue looking for more special bytes, but skip next few bytes.
2273 ContinueAndSkip(usize),
2274 /// Break looping immediately, returning with the given index and value.
2275 BreakAtWith(usize, T),
2276}
2277
2278#[cfg(all(target_arch = "x86_64", feature = "simd"))]
2279struct LookupTable {
2280 simd: [u8; 16],
2281 scalar: [bool; 256],
2282}
2283
2284#[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
2285type LookupTable = [bool; 256];
2286
2287/// This function walks the byte slices from the given index and
2288/// calls the callback function on all bytes (and their indices) that are in the following set:
2289/// `` ` ``, `\`, `&`, `*`, `_`, `~`, `!`, `<`, `[`, `]`, `|`, `\r`, `\n`
2290/// It is guaranteed not call the callback on other bytes.
2291/// Whenever `callback(ix, byte)` returns a `ContinueAndSkip(n)` value, the callback
2292/// will not be called with an index that is less than `ix + n + 1`.
2293/// When the callback returns a `BreakAtWith(end_ix, opt+val)`, no more callbacks will be
2294/// called and the function returns immediately with the return value `(end_ix, opt_val)`.
2295/// If `BreakAtWith(..)` is never returned, this function will return the first
2296/// index that is outside the byteslice bound and a `None` value.
2297fn iterate_special_bytes<F, T>(
2298 lut: &LookupTable,
2299 bytes: &[u8],
2300 ix: usize,
2301 callback: F,
2302) -> (usize, Option<T>)
2303where
2304 F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2305{
2306 #[cfg(all(target_arch = "x86_64", feature = "simd"))]
2307 {
2308 simd::iterate_special_bytes(lut, bytes, ix, callback)
2309 }
2310 #[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
2311 {
2312 scalar_iterate_special_bytes(lut, bytes, ix, callback)
2313 }
2314}
2315
2316fn scalar_iterate_special_bytes<F, T>(
2317 lut: &[bool; 256],
2318 bytes: &[u8],
2319 mut ix: usize,
2320 mut callback: F,
2321) -> (usize, Option<T>)
2322where
2323 F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2324{
2325 while ix < bytes.len() {
2326 let b: u8 = bytes[ix];
2327 if lut[b as usize] {
2328 match callback(ix, b) {
2329 LoopInstruction::ContinueAndSkip(skip: usize) => {
2330 ix += skip;
2331 }
2332 LoopInstruction::BreakAtWith(ix: usize, val: Option) => {
2333 return (ix, val);
2334 }
2335 }
2336 }
2337 ix += 1;
2338 }
2339
2340 (ix, None)
2341}
2342
2343/// Split the usual heading content range and the content inside the trailing attribute block.
2344///
2345/// Returns `(leading_content_len, Option<trailing_attr_block_range>)`.
2346///
2347/// Note that `trailing_attr_block_range` will be empty range when the block
2348/// is `{}`, since the range is content inside the wrapping `{` and `}`.
2349///
2350/// The closing `}` of an attribute block can have trailing whitespaces.
2351/// They are automatically trimmed when the attribute block is being searched.
2352///
2353/// However, this method does not trim the trailing whitespaces of heading content.
2354/// It is callers' responsibility to trim them if necessary.
2355fn extract_attribute_block_content_from_header_text(
2356 heading: &[u8],
2357) -> (usize, Option<Range<usize>>) {
2358 let heading_len = heading.len();
2359 let mut ix = heading_len;
2360 ix -= scan_rev_while(heading, |b| {
2361 b == b'\n' || b == b'\r' || b == b' ' || b == b'\t'
2362 });
2363 if ix == 0 {
2364 return (heading_len, None);
2365 }
2366
2367 let attr_block_close = ix - 1;
2368 if heading.get(attr_block_close) != Some(&b'}') {
2369 // The last character is not `}`. No attribute blocks found.
2370 return (heading_len, None);
2371 }
2372 // move cursor before the closing right brace (`}`)
2373 ix -= 1;
2374
2375 ix -= scan_rev_while(&heading[..ix], |b| {
2376 // Characters to be excluded:
2377 // * `{` and `}`: special characters to open and close an attribute block.
2378 // * `\\`: a special character to escape many characters and disable some syntaxes.
2379 // + Handling of this escape character differs among markdown processors.
2380 // + Escaped characters will be separate text node from neighbors, so
2381 // it is not easy to handle unescaped string and trim the trailing block.
2382 // * `<` and `>`: special characters to start and end HTML tag.
2383 // + No known processors converts `{#<i>foo</i>}` into
2384 // `id="&lt;i&gt;foo&lt;/&gt;"` as of this writing, so hopefully
2385 // this restriction won't cause compatibility issues.
2386 // * `\n` and `\r`: a newline character.
2387 // + Setext heading can have multiple lines. However it is hard to support
2388 // attribute blocks that have newline inside, since the parsing proceeds line by
2389 // line and lines will be separate nodes even they are logically a single text.
2390 !matches!(b, b'{' | b'}' | b'<' | b'>' | b'\\' | b'\n' | b'\r')
2391 });
2392 if ix == 0 {
2393 // `{` is not found. No attribute blocks available.
2394 return (heading_len, None);
2395 }
2396 let attr_block_open = ix - 1;
2397 if heading[attr_block_open] != b'{' {
2398 // `{` is not found. No attribute blocks available.
2399 return (heading_len, None);
2400 }
2401
2402 (attr_block_open, Some(ix..attr_block_close))
2403}
2404
2405/// Parses an attribute block content, such as `.class1 #id .class2`.
2406///
2407/// Returns `(id, classes)`.
2408///
2409/// It is callers' responsibility to find opening and closing characters of the attribute
2410/// block. Usually [`extract_attribute_block_content_from_header_text`] function does it for you.
2411///
2412/// Note that this parsing requires explicit whitespace separators between
2413/// attributes. This is intentional design with the reasons below:
2414///
2415/// * to keep conversion simple and easy to understand for any possible input,
2416/// * to avoid adding less obvious conversion rule that can reduce compatibility
2417/// with other implementations more, and
2418/// * to follow the major design of implementations with the support for the
2419/// attribute blocks extension (as of this writing).
2420///
2421/// See also: [`Options::ENABLE_HEADING_ATTRIBUTES`].
2422///
2423/// [`Options::ENABLE_HEADING_ATTRIBUTES`]: `crate::Options::ENABLE_HEADING_ATTRIBUTES`
2424fn parse_inside_attribute_block(inside_attr_block: &str) -> Option<HeadingAttributes<'_>> {
2425 let mut id = None;
2426 let mut classes = Vec::new();
2427 let mut attrs = Vec::new();
2428
2429 for attr in inside_attr_block.split_ascii_whitespace() {
2430 // iterator returned by `str::split_ascii_whitespace` never emits empty
2431 // strings, so taking first byte won't panic.
2432 if attr.len() > 1 {
2433 let first_byte = attr.as_bytes()[0];
2434 if first_byte == b'#' {
2435 id = Some(attr[1..].into());
2436 } else if first_byte == b'.' {
2437 classes.push(attr[1..].into());
2438 } else {
2439 let split = attr.split_once('=');
2440 if let Some((key, value)) = split {
2441 attrs.push((key.into(), Some(value.into())));
2442 } else {
2443 attrs.push((attr.into(), None));
2444 }
2445 }
2446 }
2447 }
2448
2449 Some(HeadingAttributes { id, classes, attrs })
2450}
2451
2452#[cfg(all(target_arch = "x86_64", feature = "simd"))]
2453mod simd {
2454 //! SIMD byte scanning logic.
2455 //!
2456 //! This module provides functions that allow walking through byteslices, calling
2457 //! provided callback functions on special bytes and their indices using SIMD.
2458 //! The byteset is defined in `compute_lookup`.
2459 //!
2460 //! The idea is to load in a chunk of 16 bytes and perform a lookup into a set of
2461 //! bytes on all the bytes in this chunk simultaneously. We produce a 16 bit bitmask
2462 //! from this and call the callback on every index corresponding to a 1 in this mask
2463 //! before moving on to the next chunk. This allows us to move quickly when there
2464 //! are no or few matches.
2465 //!
2466 //! The table lookup is inspired by this [great overview]. However, since all of the
2467 //! bytes we're interested in are ASCII, we don't quite need the full generality of
2468 //! the universal algorithm and are hence able to skip a few instructions.
2469 //!
2470 //! [great overview]: http://0x80.pl/articles/simd-byte-lookup.html
2471
2472 use super::{LookupTable, LoopInstruction};
2473 use crate::Options;
2474 use core::arch::x86_64::*;
2475
2476 const VECTOR_SIZE: usize = std::mem::size_of::<__m128i>();
2477
2478 /// Generates a lookup table containing the bitmaps for our
2479 /// special marker bytes. This is effectively a 128 element 2d bitvector,
2480 /// that can be indexed by a four bit row index (the lower nibble)
2481 /// and a three bit column index (upper nibble).
2482 pub(super) fn compute_lookup(options: &Options) -> [u8; 16] {
2483 let mut lookup = [0u8; 16];
2484 let standard_bytes = [
2485 b'\n', b'\r', b'*', b'_', b'&', b'\\', b'[', b']', b'<', b'!', b'`',
2486 ];
2487
2488 for &byte in &standard_bytes {
2489 add_lookup_byte(&mut lookup, byte);
2490 }
2491 if options.contains(Options::ENABLE_TABLES) {
2492 add_lookup_byte(&mut lookup, b'|');
2493 }
2494 if options.contains(Options::ENABLE_STRIKETHROUGH) {
2495 add_lookup_byte(&mut lookup, b'~');
2496 }
2497 if options.contains(Options::ENABLE_MATH) {
2498 add_lookup_byte(&mut lookup, b'$');
2499 add_lookup_byte(&mut lookup, b'{');
2500 add_lookup_byte(&mut lookup, b'}');
2501 }
2502 if options.contains(Options::ENABLE_SMART_PUNCTUATION) {
2503 for &byte in &[b'.', b'-', b'"', b'\''] {
2504 add_lookup_byte(&mut lookup, byte);
2505 }
2506 }
2507
2508 lookup
2509 }
2510
2511 fn add_lookup_byte(lookup: &mut [u8; 16], byte: u8) {
2512 lookup[(byte & 0x0f) as usize] |= 1 << (byte >> 4);
2513 }
2514
2515 /// Computes a bit mask for the given byteslice starting from the given index,
2516 /// where the 16 least significant bits indicate (by value of 1) whether or not
2517 /// there is a special character at that byte position. The least significant bit
2518 /// corresponds to `bytes[ix]` and the most significant bit corresponds to
2519 /// `bytes[ix + 15]`.
2520 /// It is only safe to call this function when `bytes.len() >= ix + VECTOR_SIZE`.
2521 #[target_feature(enable = "ssse3")]
2522 #[inline]
2523 unsafe fn compute_mask(lut: &[u8; 16], bytes: &[u8], ix: usize) -> i32 {
2524 debug_assert!(bytes.len() >= ix + VECTOR_SIZE);
2525
2526 let bitmap = _mm_loadu_si128(lut.as_ptr() as *const __m128i);
2527 // Small lookup table to compute single bit bitshifts
2528 // for 16 bytes at once.
2529 let bitmask_lookup =
2530 _mm_setr_epi8(1, 2, 4, 8, 16, 32, 64, -128, -1, -1, -1, -1, -1, -1, -1, -1);
2531
2532 // Load input from memory.
2533 let raw_ptr = bytes.as_ptr().add(ix) as *const __m128i;
2534 let input = _mm_loadu_si128(raw_ptr);
2535 // Compute the bitmap using the bottom nibble as an index
2536 // into the lookup table. Note that non-ascii bytes will have
2537 // their most significant bit set and will map to lookup[0].
2538 let bitset = _mm_shuffle_epi8(bitmap, input);
2539 // Compute the high nibbles of the input using a 16-bit rightshift of four
2540 // and a mask to prevent most-significant bit issues.
2541 let higher_nibbles = _mm_and_si128(_mm_srli_epi16(input, 4), _mm_set1_epi8(0x0f));
2542 // Create a bitmask for the bitmap by perform a left shift of the value
2543 // of the higher nibble. Bytes with their most significant set are mapped
2544 // to -1 (all ones).
2545 let bitmask = _mm_shuffle_epi8(bitmask_lookup, higher_nibbles);
2546 // Test the bit of the bitmap by AND'ing the bitmap and the mask together.
2547 let tmp = _mm_and_si128(bitset, bitmask);
2548 // Check whether the result was not null. NEQ is not a SIMD intrinsic,
2549 // but comparing to the bitmask is logically equivalent. This also prevents us
2550 // from matching any non-ASCII bytes since none of the bitmaps were all ones
2551 // (-1).
2552 let result = _mm_cmpeq_epi8(tmp, bitmask);
2553
2554 // Return the resulting bitmask.
2555 _mm_movemask_epi8(result)
2556 }
2557
2558 /// Calls callback on byte indices and their value.
2559 /// Breaks when callback returns LoopInstruction::BreakAtWith(ix, val). And skips the
2560 /// number of bytes in callback return value otherwise.
2561 /// Returns the final index and a possible break value.
2562 pub(super) fn iterate_special_bytes<F, T>(
2563 lut: &LookupTable,
2564 bytes: &[u8],
2565 ix: usize,
2566 callback: F,
2567 ) -> (usize, Option<T>)
2568 where
2569 F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2570 {
2571 if is_x86_feature_detected!("ssse3") && bytes.len() >= VECTOR_SIZE {
2572 unsafe { simd_iterate_special_bytes(&lut.simd, bytes, ix, callback) }
2573 } else {
2574 super::scalar_iterate_special_bytes(&lut.scalar, bytes, ix, callback)
2575 }
2576 }
2577
2578 /// Calls the callback function for every 1 in the given bitmask with
2579 /// the index `offset + ix`, where `ix` is the position of the 1 in the mask.
2580 /// Returns `Ok(ix)` to continue from index `ix`, `Err((end_ix, opt_val)` to break with
2581 /// final index `end_ix` and optional value `opt_val`.
2582 unsafe fn process_mask<F, T>(
2583 mut mask: i32,
2584 bytes: &[u8],
2585 mut offset: usize,
2586 callback: &mut F,
2587 ) -> Result<usize, (usize, Option<T>)>
2588 where
2589 F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2590 {
2591 while mask != 0 {
2592 let mask_ix = mask.trailing_zeros() as usize;
2593 offset += mask_ix;
2594 match callback(offset, *bytes.get_unchecked(offset)) {
2595 LoopInstruction::ContinueAndSkip(skip) => {
2596 offset += skip + 1;
2597 mask = mask.wrapping_shr((skip + 1 + mask_ix) as u32);
2598 }
2599 LoopInstruction::BreakAtWith(ix, val) => return Err((ix, val)),
2600 }
2601 }
2602 Ok(offset)
2603 }
2604
2605 #[target_feature(enable = "ssse3")]
2606 /// Important: only call this function when `bytes.len() >= 16`. Doing
2607 /// so otherwise may exhibit undefined behaviour.
2608 unsafe fn simd_iterate_special_bytes<F, T>(
2609 lut: &[u8; 16],
2610 bytes: &[u8],
2611 mut ix: usize,
2612 mut callback: F,
2613 ) -> (usize, Option<T>)
2614 where
2615 F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2616 {
2617 debug_assert!(bytes.len() >= VECTOR_SIZE);
2618 let upperbound = bytes.len() - VECTOR_SIZE;
2619
2620 while ix < upperbound {
2621 let mask = compute_mask(lut, bytes, ix);
2622 let block_start = ix;
2623 ix = match process_mask(mask, bytes, ix, &mut callback) {
2624 Ok(ix) => std::cmp::max(ix, VECTOR_SIZE + block_start),
2625 Err((end_ix, val)) => return (end_ix, val),
2626 };
2627 }
2628
2629 if bytes.len() > ix {
2630 // shift off the bytes at start we have already scanned
2631 let mask = compute_mask(lut, bytes, upperbound) >> ix - upperbound;
2632 if let Err((end_ix, val)) = process_mask(mask, bytes, ix, &mut callback) {
2633 return (end_ix, val);
2634 }
2635 }
2636
2637 (bytes.len(), None)
2638 }
2639
2640 #[cfg(test)]
2641 mod simd_test {
2642 use super::super::create_lut;
2643 use super::{iterate_special_bytes, LoopInstruction};
2644 use crate::Options;
2645
2646 fn check_expected_indices(bytes: &[u8], expected: &[usize], skip: usize) {
2647 let mut opts = Options::empty();
2648 opts.insert(Options::ENABLE_MATH);
2649 opts.insert(Options::ENABLE_TABLES);
2650 opts.insert(Options::ENABLE_FOOTNOTES);
2651 opts.insert(Options::ENABLE_STRIKETHROUGH);
2652 opts.insert(Options::ENABLE_TASKLISTS);
2653
2654 let lut = create_lut(&opts);
2655 let mut indices = vec![];
2656
2657 iterate_special_bytes::<_, i32>(&lut, bytes, 0, |ix, _byte_ty| {
2658 indices.push(ix);
2659 LoopInstruction::ContinueAndSkip(skip)
2660 });
2661
2662 assert_eq!(&indices[..], expected);
2663 }
2664
2665 #[test]
2666 fn simple_no_match() {
2667 check_expected_indices("abcdef0123456789".as_bytes(), &[], 0);
2668 }
2669
2670 #[test]
2671 fn simple_match() {
2672 check_expected_indices("*bcd&f0123456789".as_bytes(), &[0, 4], 0);
2673 }
2674
2675 #[test]
2676 fn single_open_fish() {
2677 check_expected_indices("<".as_bytes(), &[0], 0);
2678 }
2679
2680 #[test]
2681 fn long_match() {
2682 check_expected_indices("0123456789abcde~*bcd&f0".as_bytes(), &[15, 16, 20], 0);
2683 }
2684
2685 #[test]
2686 fn border_skip() {
2687 check_expected_indices("0123456789abcde~~~~d&f0".as_bytes(), &[15, 20], 3);
2688 }
2689
2690 #[test]
2691 fn exhaustive_search() {
2692 let chars = [
2693 b'\n', b'\r', b'*', b'_', b'~', b'|', b'&', b'\\', b'[', b']', b'<', b'!', b'`',
2694 b'$', b'{', b'}',
2695 ];
2696
2697 for &c in &chars {
2698 for i in 0u8..=255 {
2699 if !chars.contains(&i) {
2700 // full match
2701 let mut buf = [i; 18];
2702 buf[3] = c;
2703 buf[6] = c;
2704
2705 check_expected_indices(&buf[..], &[3, 6], 0);
2706 }
2707 }
2708 }
2709 }
2710 }
2711}
2712