1//! Miscellaneous utilities to increase comfort.
2//! Special thanks to:
3//!
4//! - <https://github.com/BenjaminRi/Redwood-Wiki/blob/master/src/markdown_utils.rs>.
5//! Its author authorized the use of this GPL code in this project in
6//! <https://github.com/raphlinus/pulldown-cmark/issues/507>.
7//!
8//! - <https://gist.github.com/rambip/a507c312ed61c99c24b2a54f98325721>.
9//! Its author proposed the solution in
10//! <https://github.com/raphlinus/pulldown-cmark/issues/708>.
11
12use crate::{
13 BrokenLinkCallback, CowStr, DefaultBrokenLinkCallback, Event, OffsetIter, Options, Parser,
14};
15use std::{iter::Peekable, ops::Range};
16
17/// Merge consecutive `Event::Text` events into only one.
18#[derive(Debug)]
19pub struct TextMergeStream<'a, I> {
20 iter: I,
21 last_event: Option<Event<'a>>,
22}
23
24impl<'a, I> TextMergeStream<'a, I>
25where
26 I: Iterator<Item = Event<'a>>,
27{
28 pub fn new(iter: I) -> Self {
29 Self {
30 iter,
31 last_event: None,
32 }
33 }
34}
35
36impl<'a, I> Iterator for TextMergeStream<'a, I>
37where
38 I: Iterator<Item = Event<'a>>,
39{
40 type Item = Event<'a>;
41
42 fn next(&mut self) -> Option<Self::Item> {
43 match (self.last_event.take(), self.iter.next()) {
44 (Some(Event::Text(last_text)), Some(Event::Text(next_text))) => {
45 // We need to start merging consecutive text events together into one
46 let mut string_buf: String = last_text.into_string();
47 string_buf.push_str(&next_text);
48 loop {
49 // Avoid recursion to avoid stack overflow and to optimize concatenation
50 match self.iter.next() {
51 Some(Event::Text(next_text)) => {
52 string_buf.push_str(&next_text);
53 }
54 next_event => {
55 self.last_event = next_event;
56 if string_buf.is_empty() {
57 // Discard text event(s) altogether if there is no text
58 break self.next();
59 } else {
60 break Some(Event::Text(CowStr::Boxed(
61 string_buf.into_boxed_str(),
62 )));
63 }
64 }
65 }
66 }
67 }
68 (None, Some(next_event)) => {
69 // This only happens once during the first iteration and if there are items
70 self.last_event = Some(next_event);
71 self.next()
72 }
73 (None, None) => {
74 // This happens when the iterator is depleted
75 None
76 }
77 (last_event, next_event) => {
78 // The ordinary case, emit one event after the other without modification
79 self.last_event = next_event;
80 last_event
81 }
82 }
83 }
84}
85
86/// Merge consecutive `Event::Text` events into only one with offsets.
87#[derive(Debug)]
88pub struct TextMergeWithOffset<'input, F = DefaultBrokenLinkCallback>
89where
90 F: BrokenLinkCallback<'input>,
91{
92 source: &'input str,
93 parser: Peekable<OffsetIter<'input, F>>,
94}
95
96impl<'input, F> TextMergeWithOffset<'input, F>
97where
98 F: BrokenLinkCallback<'input>,
99{
100 pub fn new_ext(source: &'input str, options: Options) -> Self {
101 Self {
102 source,
103 parser: ParserOffsetIter<'_, F>::new_with_broken_link_callback(text:source, options, broken_link_callback:None)
104 .into_offset_iter()
105 .peekable(),
106 }
107 }
108 pub fn new_ext_with_broken_link_callback(
109 source: &'input str,
110 options: Options,
111 callback: Option<F>,
112 ) -> Self {
113 Self {
114 source,
115 parser: ParserOffsetIter<'_, F>::new_with_broken_link_callback(text:source, options, callback)
116 .into_offset_iter()
117 .peekable(),
118 }
119 }
120}
121
122impl<'input, F> Iterator for TextMergeWithOffset<'input, F>
123where
124 F: BrokenLinkCallback<'input>,
125{
126 type Item = (Event<'input>, Range<usize>);
127 fn next(&mut self) -> Option<Self::Item> {
128 let is_empty_text = |x: Option<&(Event<'input>, Range<usize>)>| match x {
129 Some(e) => matches!(&e.0, Event::Text(t) if t.is_empty()),
130 None => false,
131 };
132
133 while is_empty_text(self.parser.peek()) {
134 self.parser.next();
135 }
136
137 match self.parser.peek()? {
138 (Event::Text(_), range) => {
139 let start = range.start;
140 let mut end = range.end;
141 while let Some((Event::Text(_), _)) = self.parser.peek() {
142 end = self.parser.next().unwrap().1.end;
143 }
144
145 Some((Event::Text(self.source[start..end].into()), start..end))
146 }
147 _ => self.parser.next(),
148 }
149 }
150}
151