1use std::{
2 borrow::Cow,
3 iter::{FusedIterator, Peekable},
4 str::CharIndices,
5};
6
7#[derive(Debug, Clone, Copy)]
8enum State {
9 Start,
10 S1,
11 S2,
12 S3,
13 S4,
14 S5,
15 S6,
16 S7,
17 S8,
18 S9,
19 S10,
20 S11,
21 Trap,
22}
23
24impl Default for State {
25 fn default() -> Self {
26 Self::Start
27 }
28}
29
30impl State {
31 fn is_final(&self) -> bool {
32 #[allow(clippy::match_like_matches_macro)]
33 match self {
34 Self::S3 | Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S9 | Self::S11 => true,
35 _ => false,
36 }
37 }
38
39 fn is_trapped(&self) -> bool {
40 #[allow(clippy::match_like_matches_macro)]
41 match self {
42 Self::Trap => true,
43 _ => false,
44 }
45 }
46
47 fn transition(&mut self, c: char) {
48 *self = match c {
49 '\u{1b}' | '\u{9b}' => match self {
50 Self::Start => Self::S1,
51 _ => Self::Trap,
52 },
53 '(' | ')' => match self {
54 Self::S1 => Self::S2,
55 Self::S2 | Self::S4 => Self::S4,
56 _ => Self::Trap,
57 },
58 ';' => match self {
59 Self::S1 | Self::S2 | Self::S4 => Self::S4,
60 Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S10 => Self::S10,
61 _ => Self::Trap,
62 },
63
64 '[' | '#' | '?' => match self {
65 Self::S1 | Self::S2 | Self::S4 => Self::S4,
66 _ => Self::Trap,
67 },
68 '0'..='2' => match self {
69 Self::S1 | Self::S4 => Self::S5,
70 Self::S2 => Self::S3,
71 Self::S5 => Self::S6,
72 Self::S6 => Self::S7,
73 Self::S7 => Self::S8,
74 Self::S8 => Self::S9,
75 Self::S10 => Self::S5,
76 _ => Self::Trap,
77 },
78 '3'..='9' => match self {
79 Self::S1 | Self::S4 => Self::S5,
80 Self::S2 => Self::S5,
81 Self::S5 => Self::S6,
82 Self::S6 => Self::S7,
83 Self::S7 => Self::S8,
84 Self::S8 => Self::S9,
85 Self::S10 => Self::S5,
86 _ => Self::Trap,
87 },
88 'A'..='P' | 'R' | 'Z' | 'c' | 'f'..='n' | 'q' | 'r' | 'y' | '=' | '>' | '<' => {
89 match self {
90 Self::S1
91 | Self::S2
92 | Self::S4
93 | Self::S5
94 | Self::S6
95 | Self::S7
96 | Self::S8
97 | Self::S10 => Self::S11,
98 _ => Self::Trap,
99 }
100 }
101 _ => Self::Trap,
102 };
103 }
104}
105
106#[derive(Debug)]
107struct Matches<'a> {
108 s: &'a str,
109 it: Peekable<CharIndices<'a>>,
110}
111
112impl<'a> Matches<'a> {
113 fn new(s: &'a str) -> Self {
114 let it: impl Iterator = s.char_indices().peekable();
115 Self { s, it }
116 }
117}
118
119#[derive(Debug)]
120struct Match<'a> {
121 text: &'a str,
122 start: usize,
123 end: usize,
124}
125
126impl<'a> Match<'a> {
127 #[inline]
128 pub fn as_str(&self) -> &'a str {
129 &self.text[self.start..self.end]
130 }
131}
132
133impl<'a> Iterator for Matches<'a> {
134 type Item = Match<'a>;
135
136 fn next(&mut self) -> Option<Self::Item> {
137 find_ansi_code_exclusive(&mut self.it).map(|(start: usize, end: usize)| Match {
138 text: self.s,
139 start,
140 end,
141 })
142 }
143}
144
145impl<'a> FusedIterator for Matches<'a> {}
146
147fn find_ansi_code_exclusive(it: &mut Peekable<CharIndices>) -> Option<(usize, usize)> {
148 'outer: loop {
149 if let (start, '\u{1b}') | (start, '\u{9b}') = it.peek()? {
150 let start = *start;
151 let mut state = State::default();
152 let mut maybe_end = None;
153
154 loop {
155 let item = it.peek();
156
157 if let Some((idx, c)) = item {
158 state.transition(*c);
159
160 if state.is_final() {
161 maybe_end = Some(*idx);
162 }
163 }
164
165 // The match is greedy so run till we hit the trap state no matter what. A valid
166 // match is just one that was final at some point
167 if state.is_trapped() || item.is_none() {
168 match maybe_end {
169 Some(end) => {
170 // All possible final characters are a single byte so it's safe to make
171 // the end exclusive by just adding one
172 return Some((start, end + 1));
173 }
174 // The character we are peeking right now might be the start of a match so
175 // we want to continue the loop without popping off that char
176 None => continue 'outer,
177 }
178 }
179
180 it.next();
181 }
182 }
183
184 it.next();
185 }
186}
187
188/// Helper function to strip ansi codes.
189pub fn strip_ansi_codes(s: &str) -> Cow<str> {
190 let mut char_it: impl Iterator = s.char_indices().peekable();
191 match find_ansi_code_exclusive(&mut char_it) {
192 Some(_) => {
193 let stripped: String = AnsiCodeIteratorimpl Iterator::new(s)
194 .filter_map(|(text: &str, is_ansi: bool)| if is_ansi { None } else { Some(text) })
195 .collect();
196 Cow::Owned(stripped)
197 }
198 None => Cow::Borrowed(s),
199 }
200}
201
202/// An iterator over ansi codes in a string.
203///
204/// This type can be used to scan over ansi codes in a string.
205/// It yields tuples in the form `(s, is_ansi)` where `s` is a slice of
206/// the original string and `is_ansi` indicates if the slice contains
207/// ansi codes or string values.
208pub struct AnsiCodeIterator<'a> {
209 s: &'a str,
210 pending_item: Option<(&'a str, bool)>,
211 last_idx: usize,
212 cur_idx: usize,
213 iter: Matches<'a>,
214}
215
216impl<'a> AnsiCodeIterator<'a> {
217 /// Creates a new ansi code iterator.
218 pub fn new(s: &'a str) -> AnsiCodeIterator<'a> {
219 AnsiCodeIterator {
220 s,
221 pending_item: None,
222 last_idx: 0,
223 cur_idx: 0,
224 iter: Matches::new(s),
225 }
226 }
227
228 /// Returns the string slice up to the current match.
229 pub fn current_slice(&self) -> &str {
230 &self.s[..self.cur_idx]
231 }
232
233 /// Returns the string slice from the current match to the end.
234 pub fn rest_slice(&self) -> &str {
235 &self.s[self.cur_idx..]
236 }
237}
238
239impl<'a> Iterator for AnsiCodeIterator<'a> {
240 type Item = (&'a str, bool);
241
242 fn next(&mut self) -> Option<(&'a str, bool)> {
243 if let Some(pending_item) = self.pending_item.take() {
244 self.cur_idx += pending_item.0.len();
245 Some(pending_item)
246 } else if let Some(m) = self.iter.next() {
247 let s = &self.s[self.last_idx..m.start];
248 self.last_idx = m.end;
249 if s.is_empty() {
250 self.cur_idx = m.end;
251 Some((m.as_str(), true))
252 } else {
253 self.cur_idx = m.start;
254 self.pending_item = Some((m.as_str(), true));
255 Some((s, false))
256 }
257 } else if self.last_idx < self.s.len() {
258 let rv = &self.s[self.last_idx..];
259 self.cur_idx = self.s.len();
260 self.last_idx = self.s.len();
261 Some((rv, false))
262 } else {
263 None
264 }
265 }
266}
267
268impl<'a> FusedIterator for AnsiCodeIterator<'a> {}
269
270#[cfg(test)]
271mod tests {
272 use super::*;
273
274 use lazy_static::lazy_static;
275 use proptest::prelude::*;
276 use regex::Regex;
277
278 // The manual dfa `State` is a handwritten translation from the previously used regex. That
279 // regex is kept here and used to ensure that the new matches are the same as the old
280 lazy_static! {
281 static ref STRIP_ANSI_RE: Regex = Regex::new(
282 r"[\x1b\x9b]([()][012AB]|[\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><])",
283 )
284 .unwrap();
285 }
286
287 impl<'a, 'b> PartialEq<Match<'a>> for regex::Match<'b> {
288 fn eq(&self, other: &Match<'a>) -> bool {
289 self.start() == other.start && self.end() == other.end
290 }
291 }
292
293 proptest! {
294 #[test]
295 fn dfa_matches_old_regex(s in r"([\x1b\x9b]?.*){0,5}") {
296 let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
297 let new_matches: Vec<_> = Matches::new(&s).collect();
298 assert_eq!(old_matches, new_matches);
299 }
300 }
301
302 #[test]
303 fn dfa_matches_regex_on_small_strings() {
304 // To make sure the test runs in a reasonable time this is a slimmed down list of
305 // characters to reduce the groups that are only used with each other along with one
306 // arbitrarily chosen character not used in the regex (' ')
307 const POSSIBLE_BYTES: &[u8] = &[b' ', 0x1b, 0x9b, b'(', b'0', b'[', b';', b'3', b'C'];
308
309 fn check_all_strings_of_len(len: usize) {
310 _check_all_strings_of_len(len, &mut Vec::with_capacity(len));
311 }
312
313 fn _check_all_strings_of_len(len: usize, chunk: &mut Vec<u8>) {
314 if len == 0 {
315 if let Ok(s) = std::str::from_utf8(chunk) {
316 let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(s).collect();
317 let new_matches: Vec<_> = Matches::new(s).collect();
318 assert_eq!(old_matches, new_matches);
319 }
320
321 return;
322 }
323
324 for b in POSSIBLE_BYTES {
325 chunk.push(*b);
326 _check_all_strings_of_len(len - 1, chunk);
327 chunk.pop();
328 }
329 }
330
331 for str_len in 0..=6 {
332 check_all_strings_of_len(str_len);
333 }
334 }
335
336 #[test]
337 fn complex_data() {
338 let s = std::fs::read_to_string(
339 std::path::Path::new("tests")
340 .join("data")
341 .join("sample_zellij_session.log"),
342 )
343 .unwrap();
344
345 let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
346 let new_matches: Vec<_> = Matches::new(&s).collect();
347 assert_eq!(old_matches, new_matches);
348 }
349
350 #[test]
351 fn state_machine() {
352 let ansi_code = "\x1b)B";
353 let mut state = State::default();
354 assert!(!state.is_final());
355
356 for c in ansi_code.chars() {
357 state.transition(c);
358 }
359 assert!(state.is_final());
360
361 state.transition('A');
362 assert!(state.is_trapped());
363 }
364
365 #[test]
366 fn back_to_back_entry_char() {
367 let s = "\x1b\x1bf";
368 let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
369 assert_eq!(&["\x1bf"], matches.as_slice());
370 }
371
372 #[test]
373 fn early_paren_can_use_many_chars() {
374 let s = "\x1b(C";
375 let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
376 assert_eq!(&[s], matches.as_slice());
377 }
378
379 #[test]
380 fn long_run_of_digits() {
381 let s = "\u{1b}00000";
382 let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
383 assert_eq!(&[s], matches.as_slice());
384 }
385
386 #[test]
387 fn test_ansi_iter_re_vt100() {
388 let s = "\x1b(0lpq\x1b)Benglish";
389 let mut iter = AnsiCodeIterator::new(s);
390 assert_eq!(iter.next(), Some(("\x1b(0", true)));
391 assert_eq!(iter.next(), Some(("lpq", false)));
392 assert_eq!(iter.next(), Some(("\x1b)B", true)));
393 assert_eq!(iter.next(), Some(("english", false)));
394 }
395
396 #[test]
397 fn test_ansi_iter_re() {
398 use crate::style;
399 let s = format!("Hello {}!", style("World").red().force_styling(true));
400 let mut iter = AnsiCodeIterator::new(&s);
401 assert_eq!(iter.next(), Some(("Hello ", false)));
402 assert_eq!(iter.current_slice(), "Hello ");
403 assert_eq!(iter.rest_slice(), "\x1b[31mWorld\x1b[0m!");
404 assert_eq!(iter.next(), Some(("\x1b[31m", true)));
405 assert_eq!(iter.current_slice(), "Hello \x1b[31m");
406 assert_eq!(iter.rest_slice(), "World\x1b[0m!");
407 assert_eq!(iter.next(), Some(("World", false)));
408 assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld");
409 assert_eq!(iter.rest_slice(), "\x1b[0m!");
410 assert_eq!(iter.next(), Some(("\x1b[0m", true)));
411 assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m");
412 assert_eq!(iter.rest_slice(), "!");
413 assert_eq!(iter.next(), Some(("!", false)));
414 assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m!");
415 assert_eq!(iter.rest_slice(), "");
416 assert_eq!(iter.next(), None);
417 }
418
419 #[test]
420 fn test_ansi_iter_re_on_multi() {
421 use crate::style;
422 let s = format!("{}", style("a").red().bold().force_styling(true));
423 let mut iter = AnsiCodeIterator::new(&s);
424 assert_eq!(iter.next(), Some(("\x1b[31m", true)));
425 assert_eq!(iter.current_slice(), "\x1b[31m");
426 assert_eq!(iter.rest_slice(), "\x1b[1ma\x1b[0m");
427 assert_eq!(iter.next(), Some(("\x1b[1m", true)));
428 assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1m");
429 assert_eq!(iter.rest_slice(), "a\x1b[0m");
430 assert_eq!(iter.next(), Some(("a", false)));
431 assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma");
432 assert_eq!(iter.rest_slice(), "\x1b[0m");
433 assert_eq!(iter.next(), Some(("\x1b[0m", true)));
434 assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma\x1b[0m");
435 assert_eq!(iter.rest_slice(), "");
436 assert_eq!(iter.next(), None);
437 }
438}
439