| 1 | //! Miscellaneous utilities to increase comfort. |
| 2 | //! Special thanks to: |
| 3 | //! |
| 4 | //! - <https://github.com/BenjaminRi/Redwood-Wiki/blob/master/src/markdown_utils.rs>. |
| 5 | //! Its author authorized the use of this GPL code in this project in |
| 6 | //! <https://github.com/raphlinus/pulldown-cmark/issues/507>. |
| 7 | //! |
| 8 | //! - <https://gist.github.com/rambip/a507c312ed61c99c24b2a54f98325721>. |
| 9 | //! Its author proposed the solution in |
| 10 | //! <https://github.com/raphlinus/pulldown-cmark/issues/708>. |
| 11 | |
| 12 | use crate::{ |
| 13 | BrokenLinkCallback, CowStr, DefaultBrokenLinkCallback, Event, OffsetIter, Options, Parser, |
| 14 | }; |
| 15 | use std::{iter::Peekable, ops::Range}; |
| 16 | |
| 17 | /// Merge consecutive `Event::Text` events into only one. |
| 18 | #[derive (Debug)] |
| 19 | pub struct TextMergeStream<'a, I> { |
| 20 | iter: I, |
| 21 | last_event: Option<Event<'a>>, |
| 22 | } |
| 23 | |
| 24 | impl<'a, I> TextMergeStream<'a, I> |
| 25 | where |
| 26 | I: Iterator<Item = Event<'a>>, |
| 27 | { |
| 28 | pub fn new(iter: I) -> Self { |
| 29 | Self { |
| 30 | iter, |
| 31 | last_event: None, |
| 32 | } |
| 33 | } |
| 34 | } |
| 35 | |
| 36 | impl<'a, I> Iterator for TextMergeStream<'a, I> |
| 37 | where |
| 38 | I: Iterator<Item = Event<'a>>, |
| 39 | { |
| 40 | type Item = Event<'a>; |
| 41 | |
| 42 | fn next(&mut self) -> Option<Self::Item> { |
| 43 | match (self.last_event.take(), self.iter.next()) { |
| 44 | (Some(Event::Text(last_text)), Some(Event::Text(next_text))) => { |
| 45 | // We need to start merging consecutive text events together into one |
| 46 | let mut string_buf: String = last_text.into_string(); |
| 47 | string_buf.push_str(&next_text); |
| 48 | loop { |
| 49 | // Avoid recursion to avoid stack overflow and to optimize concatenation |
| 50 | match self.iter.next() { |
| 51 | Some(Event::Text(next_text)) => { |
| 52 | string_buf.push_str(&next_text); |
| 53 | } |
| 54 | next_event => { |
| 55 | self.last_event = next_event; |
| 56 | if string_buf.is_empty() { |
| 57 | // Discard text event(s) altogether if there is no text |
| 58 | break self.next(); |
| 59 | } else { |
| 60 | break Some(Event::Text(CowStr::Boxed( |
| 61 | string_buf.into_boxed_str(), |
| 62 | ))); |
| 63 | } |
| 64 | } |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | (None, Some(next_event)) => { |
| 69 | // This only happens once during the first iteration and if there are items |
| 70 | self.last_event = Some(next_event); |
| 71 | self.next() |
| 72 | } |
| 73 | (None, None) => { |
| 74 | // This happens when the iterator is depleted |
| 75 | None |
| 76 | } |
| 77 | (last_event, next_event) => { |
| 78 | // The ordinary case, emit one event after the other without modification |
| 79 | self.last_event = next_event; |
| 80 | last_event |
| 81 | } |
| 82 | } |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | /// Merge consecutive `Event::Text` events into only one with offsets. |
| 87 | #[derive (Debug)] |
| 88 | pub struct TextMergeWithOffset<'input, F = DefaultBrokenLinkCallback> |
| 89 | where |
| 90 | F: BrokenLinkCallback<'input>, |
| 91 | { |
| 92 | source: &'input str, |
| 93 | parser: Peekable<OffsetIter<'input, F>>, |
| 94 | } |
| 95 | |
| 96 | impl<'input, F> TextMergeWithOffset<'input, F> |
| 97 | where |
| 98 | F: BrokenLinkCallback<'input>, |
| 99 | { |
| 100 | pub fn new_ext(source: &'input str, options: Options) -> Self { |
| 101 | Self { |
| 102 | source, |
| 103 | parser: ParserOffsetIter<'_, F>::new_with_broken_link_callback(text:source, options, broken_link_callback:None) |
| 104 | .into_offset_iter() |
| 105 | .peekable(), |
| 106 | } |
| 107 | } |
| 108 | pub fn new_ext_with_broken_link_callback( |
| 109 | source: &'input str, |
| 110 | options: Options, |
| 111 | callback: Option<F>, |
| 112 | ) -> Self { |
| 113 | Self { |
| 114 | source, |
| 115 | parser: ParserOffsetIter<'_, F>::new_with_broken_link_callback(text:source, options, callback) |
| 116 | .into_offset_iter() |
| 117 | .peekable(), |
| 118 | } |
| 119 | } |
| 120 | } |
| 121 | |
| 122 | impl<'input, F> Iterator for TextMergeWithOffset<'input, F> |
| 123 | where |
| 124 | F: BrokenLinkCallback<'input>, |
| 125 | { |
| 126 | type Item = (Event<'input>, Range<usize>); |
| 127 | fn next(&mut self) -> Option<Self::Item> { |
| 128 | let is_empty_text = |x: Option<&(Event<'input>, Range<usize>)>| match x { |
| 129 | Some(e) => matches!(&e.0, Event::Text(t) if t.is_empty()), |
| 130 | None => false, |
| 131 | }; |
| 132 | |
| 133 | while is_empty_text(self.parser.peek()) { |
| 134 | self.parser.next(); |
| 135 | } |
| 136 | |
| 137 | match self.parser.peek()? { |
| 138 | (Event::Text(_), range) => { |
| 139 | let start = range.start; |
| 140 | let mut end = range.end; |
| 141 | while let Some((Event::Text(_), _)) = self.parser.peek() { |
| 142 | end = self.parser.next().unwrap().1.end; |
| 143 | } |
| 144 | |
| 145 | Some((Event::Text(self.source[start..end].into()), start..end)) |
| 146 | } |
| 147 | _ => self.parser.next(), |
| 148 | } |
| 149 | } |
| 150 | } |
| 151 | |