| 1 | use std::{ |
| 2 | borrow::Cow, |
| 3 | iter::{FusedIterator, Peekable}, |
| 4 | str::CharIndices, |
| 5 | }; |
| 6 | |
| 7 | #[derive (Debug, Clone, Copy)] |
| 8 | enum State { |
| 9 | Start, |
| 10 | S1, |
| 11 | S2, |
| 12 | S3, |
| 13 | S4, |
| 14 | S5, |
| 15 | S6, |
| 16 | S7, |
| 17 | S8, |
| 18 | S9, |
| 19 | S10, |
| 20 | S11, |
| 21 | Trap, |
| 22 | } |
| 23 | |
| 24 | impl Default for State { |
| 25 | fn default() -> Self { |
| 26 | Self::Start |
| 27 | } |
| 28 | } |
| 29 | |
| 30 | impl State { |
| 31 | fn is_final(&self) -> bool { |
| 32 | #[allow (clippy::match_like_matches_macro)] |
| 33 | match self { |
| 34 | Self::S3 | Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S9 | Self::S11 => true, |
| 35 | _ => false, |
| 36 | } |
| 37 | } |
| 38 | |
| 39 | fn is_trapped(&self) -> bool { |
| 40 | #[allow (clippy::match_like_matches_macro)] |
| 41 | match self { |
| 42 | Self::Trap => true, |
| 43 | _ => false, |
| 44 | } |
| 45 | } |
| 46 | |
| 47 | fn transition(&mut self, c: char) { |
| 48 | *self = match c { |
| 49 | ' \u{1b}' | ' \u{9b}' => match self { |
| 50 | Self::Start => Self::S1, |
| 51 | _ => Self::Trap, |
| 52 | }, |
| 53 | '(' | ')' => match self { |
| 54 | Self::S1 => Self::S2, |
| 55 | Self::S2 | Self::S4 => Self::S4, |
| 56 | _ => Self::Trap, |
| 57 | }, |
| 58 | ';' => match self { |
| 59 | Self::S1 | Self::S2 | Self::S4 => Self::S4, |
| 60 | Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S10 => Self::S10, |
| 61 | _ => Self::Trap, |
| 62 | }, |
| 63 | |
| 64 | '[' | '#' | '?' => match self { |
| 65 | Self::S1 | Self::S2 | Self::S4 => Self::S4, |
| 66 | _ => Self::Trap, |
| 67 | }, |
| 68 | '0' ..='2' => match self { |
| 69 | Self::S1 | Self::S4 => Self::S5, |
| 70 | Self::S2 => Self::S3, |
| 71 | Self::S5 => Self::S6, |
| 72 | Self::S6 => Self::S7, |
| 73 | Self::S7 => Self::S8, |
| 74 | Self::S8 => Self::S9, |
| 75 | Self::S10 => Self::S5, |
| 76 | _ => Self::Trap, |
| 77 | }, |
| 78 | '3' ..='9' => match self { |
| 79 | Self::S1 | Self::S4 => Self::S5, |
| 80 | Self::S2 => Self::S5, |
| 81 | Self::S5 => Self::S6, |
| 82 | Self::S6 => Self::S7, |
| 83 | Self::S7 => Self::S8, |
| 84 | Self::S8 => Self::S9, |
| 85 | Self::S10 => Self::S5, |
| 86 | _ => Self::Trap, |
| 87 | }, |
| 88 | 'A' ..='P' | 'R' | 'Z' | 'c' | 'f' ..='n' | 'q' | 'r' | 'y' | '=' | '>' | '<' => { |
| 89 | match self { |
| 90 | Self::S1 |
| 91 | | Self::S2 |
| 92 | | Self::S4 |
| 93 | | Self::S5 |
| 94 | | Self::S6 |
| 95 | | Self::S7 |
| 96 | | Self::S8 |
| 97 | | Self::S10 => Self::S11, |
| 98 | _ => Self::Trap, |
| 99 | } |
| 100 | } |
| 101 | _ => Self::Trap, |
| 102 | }; |
| 103 | } |
| 104 | } |
| 105 | |
| 106 | #[derive (Debug)] |
| 107 | struct Matches<'a> { |
| 108 | s: &'a str, |
| 109 | it: Peekable<CharIndices<'a>>, |
| 110 | } |
| 111 | |
| 112 | impl<'a> Matches<'a> { |
| 113 | fn new(s: &'a str) -> Self { |
| 114 | let it: impl Iterator = s.char_indices().peekable(); |
| 115 | Self { s, it } |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | #[derive (Debug)] |
| 120 | struct Match<'a> { |
| 121 | text: &'a str, |
| 122 | start: usize, |
| 123 | end: usize, |
| 124 | } |
| 125 | |
| 126 | impl<'a> Match<'a> { |
| 127 | #[inline ] |
| 128 | pub fn as_str(&self) -> &'a str { |
| 129 | &self.text[self.start..self.end] |
| 130 | } |
| 131 | } |
| 132 | |
| 133 | impl<'a> Iterator for Matches<'a> { |
| 134 | type Item = Match<'a>; |
| 135 | |
| 136 | fn next(&mut self) -> Option<Self::Item> { |
| 137 | find_ansi_code_exclusive(&mut self.it).map(|(start: usize, end: usize)| Match { |
| 138 | text: self.s, |
| 139 | start, |
| 140 | end, |
| 141 | }) |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | impl FusedIterator for Matches<'_> {} |
| 146 | |
| 147 | fn find_ansi_code_exclusive(it: &mut Peekable<CharIndices>) -> Option<(usize, usize)> { |
| 148 | 'outer: loop { |
| 149 | if let (start, ' \u{1b}' ) | (start, ' \u{9b}' ) = it.peek()? { |
| 150 | let start = *start; |
| 151 | let mut state = State::default(); |
| 152 | let mut maybe_end = None; |
| 153 | |
| 154 | loop { |
| 155 | let item = it.peek(); |
| 156 | |
| 157 | if let Some((idx, c)) = item { |
| 158 | state.transition(*c); |
| 159 | |
| 160 | if state.is_final() { |
| 161 | maybe_end = Some(*idx); |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | // The match is greedy so run till we hit the trap state no matter what. A valid |
| 166 | // match is just one that was final at some point |
| 167 | if state.is_trapped() || item.is_none() { |
| 168 | match maybe_end { |
| 169 | Some(end) => { |
| 170 | // All possible final characters are a single byte so it's safe to make |
| 171 | // the end exclusive by just adding one |
| 172 | return Some((start, end + 1)); |
| 173 | } |
| 174 | // The character we are peeking right now might be the start of a match so |
| 175 | // we want to continue the loop without popping off that char |
| 176 | None => continue 'outer, |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | it.next(); |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | it.next(); |
| 185 | } |
| 186 | } |
| 187 | |
| 188 | /// Helper function to strip ansi codes. |
| 189 | pub fn strip_ansi_codes(s: &str) -> Cow<str> { |
| 190 | let mut char_it: impl Iterator = s.char_indices().peekable(); |
| 191 | match find_ansi_code_exclusive(&mut char_it) { |
| 192 | Some(_) => { |
| 193 | let stripped: String = AnsiCodeIteratorimpl Iterator ::new(s) |
| 194 | .filter_map(|(text: &str, is_ansi: bool)| if is_ansi { None } else { Some(text) }) |
| 195 | .collect(); |
| 196 | Cow::Owned(stripped) |
| 197 | } |
| 198 | None => Cow::Borrowed(s), |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | /// An iterator over ansi codes in a string. |
| 203 | /// |
| 204 | /// This type can be used to scan over ansi codes in a string. |
| 205 | /// It yields tuples in the form `(s, is_ansi)` where `s` is a slice of |
| 206 | /// the original string and `is_ansi` indicates if the slice contains |
| 207 | /// ansi codes or string values. |
| 208 | pub struct AnsiCodeIterator<'a> { |
| 209 | s: &'a str, |
| 210 | pending_item: Option<(&'a str, bool)>, |
| 211 | last_idx: usize, |
| 212 | cur_idx: usize, |
| 213 | iter: Matches<'a>, |
| 214 | } |
| 215 | |
| 216 | impl<'a> AnsiCodeIterator<'a> { |
| 217 | /// Creates a new ansi code iterator. |
| 218 | pub fn new(s: &'a str) -> AnsiCodeIterator<'a> { |
| 219 | AnsiCodeIterator { |
| 220 | s, |
| 221 | pending_item: None, |
| 222 | last_idx: 0, |
| 223 | cur_idx: 0, |
| 224 | iter: Matches::new(s), |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | /// Returns the string slice up to the current match. |
| 229 | pub fn current_slice(&self) -> &str { |
| 230 | &self.s[..self.cur_idx] |
| 231 | } |
| 232 | |
| 233 | /// Returns the string slice from the current match to the end. |
| 234 | pub fn rest_slice(&self) -> &str { |
| 235 | &self.s[self.cur_idx..] |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | impl<'a> Iterator for AnsiCodeIterator<'a> { |
| 240 | type Item = (&'a str, bool); |
| 241 | |
| 242 | fn next(&mut self) -> Option<(&'a str, bool)> { |
| 243 | if let Some(pending_item) = self.pending_item.take() { |
| 244 | self.cur_idx += pending_item.0.len(); |
| 245 | Some(pending_item) |
| 246 | } else if let Some(m) = self.iter.next() { |
| 247 | let s = &self.s[self.last_idx..m.start]; |
| 248 | self.last_idx = m.end; |
| 249 | if s.is_empty() { |
| 250 | self.cur_idx = m.end; |
| 251 | Some((m.as_str(), true)) |
| 252 | } else { |
| 253 | self.cur_idx = m.start; |
| 254 | self.pending_item = Some((m.as_str(), true)); |
| 255 | Some((s, false)) |
| 256 | } |
| 257 | } else if self.last_idx < self.s.len() { |
| 258 | let rv = &self.s[self.last_idx..]; |
| 259 | self.cur_idx = self.s.len(); |
| 260 | self.last_idx = self.s.len(); |
| 261 | Some((rv, false)) |
| 262 | } else { |
| 263 | None |
| 264 | } |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | impl FusedIterator for AnsiCodeIterator<'_> {} |
| 269 | |
| 270 | #[cfg (test)] |
| 271 | mod tests { |
| 272 | use super::*; |
| 273 | |
| 274 | use once_cell::sync::Lazy; |
| 275 | use proptest::prelude::*; |
| 276 | use regex::Regex; |
| 277 | |
| 278 | // The manual dfa `State` is a handwritten translation from the previously used regex. That |
| 279 | // regex is kept here and used to ensure that the new matches are the same as the old |
| 280 | static STRIP_ANSI_RE: Lazy<Regex> = Lazy::new(|| { |
| 281 | Regex::new( |
| 282 | r"[\x1b\x9b]([()][012AB]|[\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><])" , |
| 283 | ) |
| 284 | .unwrap() |
| 285 | }); |
| 286 | |
| 287 | impl<'a> PartialEq<Match<'a>> for regex::Match<'_> { |
| 288 | fn eq(&self, other: &Match<'a>) -> bool { |
| 289 | self.start() == other.start && self.end() == other.end |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | proptest! { |
| 294 | #[test] |
| 295 | fn dfa_matches_old_regex(s in r"([\x1b\x9b]?.*){0,5}" ) { |
| 296 | let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect(); |
| 297 | let new_matches: Vec<_> = Matches::new(&s).collect(); |
| 298 | assert_eq!(old_matches, new_matches); |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | #[test ] |
| 303 | fn dfa_matches_regex_on_small_strings() { |
| 304 | // To make sure the test runs in a reasonable time this is a slimmed down list of |
| 305 | // characters to reduce the groups that are only used with each other along with one |
| 306 | // arbitrarily chosen character not used in the regex (' ') |
| 307 | const POSSIBLE_BYTES: &[u8] = &[b' ' , 0x1b, 0x9b, b'(' , b'0' , b'[' , b';' , b'3' , b'C' ]; |
| 308 | |
| 309 | fn check_all_strings_of_len(len: usize) { |
| 310 | _check_all_strings_of_len(len, &mut Vec::with_capacity(len)); |
| 311 | } |
| 312 | |
| 313 | fn _check_all_strings_of_len(len: usize, chunk: &mut Vec<u8>) { |
| 314 | if len == 0 { |
| 315 | if let Ok(s) = std::str::from_utf8(chunk) { |
| 316 | let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(s).collect(); |
| 317 | let new_matches: Vec<_> = Matches::new(s).collect(); |
| 318 | assert_eq!(old_matches, new_matches); |
| 319 | } |
| 320 | |
| 321 | return; |
| 322 | } |
| 323 | |
| 324 | for b in POSSIBLE_BYTES { |
| 325 | chunk.push(*b); |
| 326 | _check_all_strings_of_len(len - 1, chunk); |
| 327 | chunk.pop(); |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | for str_len in 0..=6 { |
| 332 | check_all_strings_of_len(str_len); |
| 333 | } |
| 334 | } |
| 335 | |
| 336 | #[test ] |
| 337 | fn complex_data() { |
| 338 | let s = std::fs::read_to_string( |
| 339 | std::path::Path::new("tests" ) |
| 340 | .join("data" ) |
| 341 | .join("sample_zellij_session.log" ), |
| 342 | ) |
| 343 | .unwrap(); |
| 344 | |
| 345 | let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect(); |
| 346 | let new_matches: Vec<_> = Matches::new(&s).collect(); |
| 347 | assert_eq!(old_matches, new_matches); |
| 348 | } |
| 349 | |
| 350 | #[test ] |
| 351 | fn state_machine() { |
| 352 | let ansi_code = " \x1b)B" ; |
| 353 | let mut state = State::default(); |
| 354 | assert!(!state.is_final()); |
| 355 | |
| 356 | for c in ansi_code.chars() { |
| 357 | state.transition(c); |
| 358 | } |
| 359 | assert!(state.is_final()); |
| 360 | |
| 361 | state.transition('A' ); |
| 362 | assert!(state.is_trapped()); |
| 363 | } |
| 364 | |
| 365 | #[test ] |
| 366 | fn back_to_back_entry_char() { |
| 367 | let s = " \x1b\x1bf" ; |
| 368 | let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect(); |
| 369 | assert_eq!(&[" \x1bf" ], matches.as_slice()); |
| 370 | } |
| 371 | |
| 372 | #[test ] |
| 373 | fn early_paren_can_use_many_chars() { |
| 374 | let s = " \x1b(C" ; |
| 375 | let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect(); |
| 376 | assert_eq!(&[s], matches.as_slice()); |
| 377 | } |
| 378 | |
| 379 | #[test ] |
| 380 | fn long_run_of_digits() { |
| 381 | let s = " \u{1b}00000" ; |
| 382 | let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect(); |
| 383 | assert_eq!(&[s], matches.as_slice()); |
| 384 | } |
| 385 | |
| 386 | #[test ] |
| 387 | fn test_ansi_iter_re_vt100() { |
| 388 | let s = " \x1b(0lpq \x1b)Benglish" ; |
| 389 | let mut iter = AnsiCodeIterator::new(s); |
| 390 | assert_eq!(iter.next(), Some((" \x1b(0" , true))); |
| 391 | assert_eq!(iter.next(), Some(("lpq" , false))); |
| 392 | assert_eq!(iter.next(), Some((" \x1b)B" , true))); |
| 393 | assert_eq!(iter.next(), Some(("english" , false))); |
| 394 | } |
| 395 | |
| 396 | #[test ] |
| 397 | fn test_ansi_iter_re() { |
| 398 | use crate::style; |
| 399 | let s = format!("Hello {}!" , style("World" ).red().force_styling(true)); |
| 400 | let mut iter = AnsiCodeIterator::new(&s); |
| 401 | assert_eq!(iter.next(), Some(("Hello " , false))); |
| 402 | assert_eq!(iter.current_slice(), "Hello " ); |
| 403 | assert_eq!(iter.rest_slice(), " \x1b[31mWorld \x1b[0m!" ); |
| 404 | assert_eq!(iter.next(), Some((" \x1b[31m" , true))); |
| 405 | assert_eq!(iter.current_slice(), "Hello \x1b[31m" ); |
| 406 | assert_eq!(iter.rest_slice(), "World \x1b[0m!" ); |
| 407 | assert_eq!(iter.next(), Some(("World" , false))); |
| 408 | assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld" ); |
| 409 | assert_eq!(iter.rest_slice(), " \x1b[0m!" ); |
| 410 | assert_eq!(iter.next(), Some((" \x1b[0m" , true))); |
| 411 | assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld \x1b[0m" ); |
| 412 | assert_eq!(iter.rest_slice(), "!" ); |
| 413 | assert_eq!(iter.next(), Some(("!" , false))); |
| 414 | assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld \x1b[0m!" ); |
| 415 | assert_eq!(iter.rest_slice(), "" ); |
| 416 | assert_eq!(iter.next(), None); |
| 417 | } |
| 418 | |
| 419 | #[test ] |
| 420 | fn test_ansi_iter_re_on_multi() { |
| 421 | use crate::style; |
| 422 | let s = format!("{}" , style("a" ).red().bold().force_styling(true)); |
| 423 | let mut iter = AnsiCodeIterator::new(&s); |
| 424 | assert_eq!(iter.next(), Some((" \x1b[31m" , true))); |
| 425 | assert_eq!(iter.current_slice(), " \x1b[31m" ); |
| 426 | assert_eq!(iter.rest_slice(), " \x1b[1ma \x1b[0m" ); |
| 427 | assert_eq!(iter.next(), Some((" \x1b[1m" , true))); |
| 428 | assert_eq!(iter.current_slice(), " \x1b[31m \x1b[1m" ); |
| 429 | assert_eq!(iter.rest_slice(), "a \x1b[0m" ); |
| 430 | assert_eq!(iter.next(), Some(("a" , false))); |
| 431 | assert_eq!(iter.current_slice(), " \x1b[31m \x1b[1ma" ); |
| 432 | assert_eq!(iter.rest_slice(), " \x1b[0m" ); |
| 433 | assert_eq!(iter.next(), Some((" \x1b[0m" , true))); |
| 434 | assert_eq!(iter.current_slice(), " \x1b[31m \x1b[1ma \x1b[0m" ); |
| 435 | assert_eq!(iter.rest_slice(), "" ); |
| 436 | assert_eq!(iter.next(), None); |
| 437 | } |
| 438 | } |
| 439 | |