1 | use std::{ |
2 | borrow::Cow, |
3 | iter::{FusedIterator, Peekable}, |
4 | str::CharIndices, |
5 | }; |
6 | |
7 | #[derive (Debug, Clone, Copy)] |
8 | enum State { |
9 | Start, |
10 | S1, |
11 | S2, |
12 | S3, |
13 | S4, |
14 | S5, |
15 | S6, |
16 | S7, |
17 | S8, |
18 | S9, |
19 | S10, |
20 | S11, |
21 | Trap, |
22 | } |
23 | |
24 | impl Default for State { |
25 | fn default() -> Self { |
26 | Self::Start |
27 | } |
28 | } |
29 | |
30 | impl State { |
31 | fn is_final(&self) -> bool { |
32 | #[allow (clippy::match_like_matches_macro)] |
33 | match self { |
34 | Self::S3 | Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S9 | Self::S11 => true, |
35 | _ => false, |
36 | } |
37 | } |
38 | |
39 | fn is_trapped(&self) -> bool { |
40 | #[allow (clippy::match_like_matches_macro)] |
41 | match self { |
42 | Self::Trap => true, |
43 | _ => false, |
44 | } |
45 | } |
46 | |
47 | fn transition(&mut self, c: char) { |
48 | *self = match c { |
49 | ' \u{1b}' | ' \u{9b}' => match self { |
50 | Self::Start => Self::S1, |
51 | _ => Self::Trap, |
52 | }, |
53 | '(' | ')' => match self { |
54 | Self::S1 => Self::S2, |
55 | Self::S2 | Self::S4 => Self::S4, |
56 | _ => Self::Trap, |
57 | }, |
58 | ';' => match self { |
59 | Self::S1 | Self::S2 | Self::S4 => Self::S4, |
60 | Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S10 => Self::S10, |
61 | _ => Self::Trap, |
62 | }, |
63 | |
64 | '[' | '#' | '?' => match self { |
65 | Self::S1 | Self::S2 | Self::S4 => Self::S4, |
66 | _ => Self::Trap, |
67 | }, |
68 | '0' ..='2' => match self { |
69 | Self::S1 | Self::S4 => Self::S5, |
70 | Self::S2 => Self::S3, |
71 | Self::S5 => Self::S6, |
72 | Self::S6 => Self::S7, |
73 | Self::S7 => Self::S8, |
74 | Self::S8 => Self::S9, |
75 | Self::S10 => Self::S5, |
76 | _ => Self::Trap, |
77 | }, |
78 | '3' ..='9' => match self { |
79 | Self::S1 | Self::S4 => Self::S5, |
80 | Self::S2 => Self::S5, |
81 | Self::S5 => Self::S6, |
82 | Self::S6 => Self::S7, |
83 | Self::S7 => Self::S8, |
84 | Self::S8 => Self::S9, |
85 | Self::S10 => Self::S5, |
86 | _ => Self::Trap, |
87 | }, |
88 | 'A' ..='P' | 'R' | 'Z' | 'c' | 'f' ..='n' | 'q' | 'r' | 'y' | '=' | '>' | '<' => { |
89 | match self { |
90 | Self::S1 |
91 | | Self::S2 |
92 | | Self::S4 |
93 | | Self::S5 |
94 | | Self::S6 |
95 | | Self::S7 |
96 | | Self::S8 |
97 | | Self::S10 => Self::S11, |
98 | _ => Self::Trap, |
99 | } |
100 | } |
101 | _ => Self::Trap, |
102 | }; |
103 | } |
104 | } |
105 | |
106 | #[derive (Debug)] |
107 | struct Matches<'a> { |
108 | s: &'a str, |
109 | it: Peekable<CharIndices<'a>>, |
110 | } |
111 | |
112 | impl<'a> Matches<'a> { |
113 | fn new(s: &'a str) -> Self { |
114 | let it: impl Iterator = s.char_indices().peekable(); |
115 | Self { s, it } |
116 | } |
117 | } |
118 | |
119 | #[derive (Debug)] |
120 | struct Match<'a> { |
121 | text: &'a str, |
122 | start: usize, |
123 | end: usize, |
124 | } |
125 | |
126 | impl<'a> Match<'a> { |
127 | #[inline ] |
128 | pub fn as_str(&self) -> &'a str { |
129 | &self.text[self.start..self.end] |
130 | } |
131 | } |
132 | |
133 | impl<'a> Iterator for Matches<'a> { |
134 | type Item = Match<'a>; |
135 | |
136 | fn next(&mut self) -> Option<Self::Item> { |
137 | find_ansi_code_exclusive(&mut self.it).map(|(start: usize, end: usize)| Match { |
138 | text: self.s, |
139 | start, |
140 | end, |
141 | }) |
142 | } |
143 | } |
144 | |
145 | impl<'a> FusedIterator for Matches<'a> {} |
146 | |
147 | fn find_ansi_code_exclusive(it: &mut Peekable<CharIndices>) -> Option<(usize, usize)> { |
148 | 'outer: loop { |
149 | if let (start, ' \u{1b}' ) | (start, ' \u{9b}' ) = it.peek()? { |
150 | let start = *start; |
151 | let mut state = State::default(); |
152 | let mut maybe_end = None; |
153 | |
154 | loop { |
155 | let item = it.peek(); |
156 | |
157 | if let Some((idx, c)) = item { |
158 | state.transition(*c); |
159 | |
160 | if state.is_final() { |
161 | maybe_end = Some(*idx); |
162 | } |
163 | } |
164 | |
165 | // The match is greedy so run till we hit the trap state no matter what. A valid |
166 | // match is just one that was final at some point |
167 | if state.is_trapped() || item.is_none() { |
168 | match maybe_end { |
169 | Some(end) => { |
170 | // All possible final characters are a single byte so it's safe to make |
171 | // the end exclusive by just adding one |
172 | return Some((start, end + 1)); |
173 | } |
174 | // The character we are peeking right now might be the start of a match so |
175 | // we want to continue the loop without popping off that char |
176 | None => continue 'outer, |
177 | } |
178 | } |
179 | |
180 | it.next(); |
181 | } |
182 | } |
183 | |
184 | it.next(); |
185 | } |
186 | } |
187 | |
188 | /// Helper function to strip ansi codes. |
189 | pub fn strip_ansi_codes(s: &str) -> Cow<str> { |
190 | let mut char_it: impl Iterator = s.char_indices().peekable(); |
191 | match find_ansi_code_exclusive(&mut char_it) { |
192 | Some(_) => { |
193 | let stripped: String = AnsiCodeIteratorimpl Iterator ::new(s) |
194 | .filter_map(|(text: &str, is_ansi: bool)| if is_ansi { None } else { Some(text) }) |
195 | .collect(); |
196 | Cow::Owned(stripped) |
197 | } |
198 | None => Cow::Borrowed(s), |
199 | } |
200 | } |
201 | |
202 | /// An iterator over ansi codes in a string. |
203 | /// |
204 | /// This type can be used to scan over ansi codes in a string. |
205 | /// It yields tuples in the form `(s, is_ansi)` where `s` is a slice of |
206 | /// the original string and `is_ansi` indicates if the slice contains |
207 | /// ansi codes or string values. |
208 | pub struct AnsiCodeIterator<'a> { |
209 | s: &'a str, |
210 | pending_item: Option<(&'a str, bool)>, |
211 | last_idx: usize, |
212 | cur_idx: usize, |
213 | iter: Matches<'a>, |
214 | } |
215 | |
216 | impl<'a> AnsiCodeIterator<'a> { |
217 | /// Creates a new ansi code iterator. |
218 | pub fn new(s: &'a str) -> AnsiCodeIterator<'a> { |
219 | AnsiCodeIterator { |
220 | s, |
221 | pending_item: None, |
222 | last_idx: 0, |
223 | cur_idx: 0, |
224 | iter: Matches::new(s), |
225 | } |
226 | } |
227 | |
228 | /// Returns the string slice up to the current match. |
229 | pub fn current_slice(&self) -> &str { |
230 | &self.s[..self.cur_idx] |
231 | } |
232 | |
233 | /// Returns the string slice from the current match to the end. |
234 | pub fn rest_slice(&self) -> &str { |
235 | &self.s[self.cur_idx..] |
236 | } |
237 | } |
238 | |
239 | impl<'a> Iterator for AnsiCodeIterator<'a> { |
240 | type Item = (&'a str, bool); |
241 | |
242 | fn next(&mut self) -> Option<(&'a str, bool)> { |
243 | if let Some(pending_item) = self.pending_item.take() { |
244 | self.cur_idx += pending_item.0.len(); |
245 | Some(pending_item) |
246 | } else if let Some(m) = self.iter.next() { |
247 | let s = &self.s[self.last_idx..m.start]; |
248 | self.last_idx = m.end; |
249 | if s.is_empty() { |
250 | self.cur_idx = m.end; |
251 | Some((m.as_str(), true)) |
252 | } else { |
253 | self.cur_idx = m.start; |
254 | self.pending_item = Some((m.as_str(), true)); |
255 | Some((s, false)) |
256 | } |
257 | } else if self.last_idx < self.s.len() { |
258 | let rv = &self.s[self.last_idx..]; |
259 | self.cur_idx = self.s.len(); |
260 | self.last_idx = self.s.len(); |
261 | Some((rv, false)) |
262 | } else { |
263 | None |
264 | } |
265 | } |
266 | } |
267 | |
268 | impl<'a> FusedIterator for AnsiCodeIterator<'a> {} |
269 | |
270 | #[cfg (test)] |
271 | mod tests { |
272 | use super::*; |
273 | |
274 | use lazy_static::lazy_static; |
275 | use proptest::prelude::*; |
276 | use regex::Regex; |
277 | |
278 | // The manual dfa `State` is a handwritten translation from the previously used regex. That |
279 | // regex is kept here and used to ensure that the new matches are the same as the old |
280 | lazy_static! { |
281 | static ref STRIP_ANSI_RE: Regex = Regex::new( |
282 | r"[\x1b\x9b]([()][012AB]|[\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><])" , |
283 | ) |
284 | .unwrap(); |
285 | } |
286 | |
287 | impl<'a, 'b> PartialEq<Match<'a>> for regex::Match<'b> { |
288 | fn eq(&self, other: &Match<'a>) -> bool { |
289 | self.start() == other.start && self.end() == other.end |
290 | } |
291 | } |
292 | |
293 | proptest! { |
294 | #[test] |
295 | fn dfa_matches_old_regex(s in r"([\x1b\x9b]?.*){0,5}" ) { |
296 | let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect(); |
297 | let new_matches: Vec<_> = Matches::new(&s).collect(); |
298 | assert_eq!(old_matches, new_matches); |
299 | } |
300 | } |
301 | |
302 | #[test ] |
303 | fn dfa_matches_regex_on_small_strings() { |
304 | // To make sure the test runs in a reasonable time this is a slimmed down list of |
305 | // characters to reduce the groups that are only used with each other along with one |
306 | // arbitrarily chosen character not used in the regex (' ') |
307 | const POSSIBLE_BYTES: &[u8] = &[b' ' , 0x1b, 0x9b, b'(' , b'0' , b'[' , b';' , b'3' , b'C' ]; |
308 | |
309 | fn check_all_strings_of_len(len: usize) { |
310 | _check_all_strings_of_len(len, &mut Vec::with_capacity(len)); |
311 | } |
312 | |
313 | fn _check_all_strings_of_len(len: usize, chunk: &mut Vec<u8>) { |
314 | if len == 0 { |
315 | if let Ok(s) = std::str::from_utf8(chunk) { |
316 | let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(s).collect(); |
317 | let new_matches: Vec<_> = Matches::new(s).collect(); |
318 | assert_eq!(old_matches, new_matches); |
319 | } |
320 | |
321 | return; |
322 | } |
323 | |
324 | for b in POSSIBLE_BYTES { |
325 | chunk.push(*b); |
326 | _check_all_strings_of_len(len - 1, chunk); |
327 | chunk.pop(); |
328 | } |
329 | } |
330 | |
331 | for str_len in 0..=6 { |
332 | check_all_strings_of_len(str_len); |
333 | } |
334 | } |
335 | |
336 | #[test ] |
337 | fn complex_data() { |
338 | let s = std::fs::read_to_string( |
339 | std::path::Path::new("tests" ) |
340 | .join("data" ) |
341 | .join("sample_zellij_session.log" ), |
342 | ) |
343 | .unwrap(); |
344 | |
345 | let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect(); |
346 | let new_matches: Vec<_> = Matches::new(&s).collect(); |
347 | assert_eq!(old_matches, new_matches); |
348 | } |
349 | |
350 | #[test ] |
351 | fn state_machine() { |
352 | let ansi_code = " \x1b)B" ; |
353 | let mut state = State::default(); |
354 | assert!(!state.is_final()); |
355 | |
356 | for c in ansi_code.chars() { |
357 | state.transition(c); |
358 | } |
359 | assert!(state.is_final()); |
360 | |
361 | state.transition('A' ); |
362 | assert!(state.is_trapped()); |
363 | } |
364 | |
365 | #[test ] |
366 | fn back_to_back_entry_char() { |
367 | let s = " \x1b\x1bf" ; |
368 | let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect(); |
369 | assert_eq!(&[" \x1bf" ], matches.as_slice()); |
370 | } |
371 | |
372 | #[test ] |
373 | fn early_paren_can_use_many_chars() { |
374 | let s = " \x1b(C" ; |
375 | let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect(); |
376 | assert_eq!(&[s], matches.as_slice()); |
377 | } |
378 | |
379 | #[test ] |
380 | fn long_run_of_digits() { |
381 | let s = " \u{1b}00000" ; |
382 | let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect(); |
383 | assert_eq!(&[s], matches.as_slice()); |
384 | } |
385 | |
386 | #[test ] |
387 | fn test_ansi_iter_re_vt100() { |
388 | let s = " \x1b(0lpq \x1b)Benglish" ; |
389 | let mut iter = AnsiCodeIterator::new(s); |
390 | assert_eq!(iter.next(), Some((" \x1b(0" , true))); |
391 | assert_eq!(iter.next(), Some(("lpq" , false))); |
392 | assert_eq!(iter.next(), Some((" \x1b)B" , true))); |
393 | assert_eq!(iter.next(), Some(("english" , false))); |
394 | } |
395 | |
396 | #[test ] |
397 | fn test_ansi_iter_re() { |
398 | use crate::style; |
399 | let s = format!("Hello {}!" , style("World" ).red().force_styling(true)); |
400 | let mut iter = AnsiCodeIterator::new(&s); |
401 | assert_eq!(iter.next(), Some(("Hello " , false))); |
402 | assert_eq!(iter.current_slice(), "Hello " ); |
403 | assert_eq!(iter.rest_slice(), " \x1b[31mWorld \x1b[0m!" ); |
404 | assert_eq!(iter.next(), Some((" \x1b[31m" , true))); |
405 | assert_eq!(iter.current_slice(), "Hello \x1b[31m" ); |
406 | assert_eq!(iter.rest_slice(), "World \x1b[0m!" ); |
407 | assert_eq!(iter.next(), Some(("World" , false))); |
408 | assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld" ); |
409 | assert_eq!(iter.rest_slice(), " \x1b[0m!" ); |
410 | assert_eq!(iter.next(), Some((" \x1b[0m" , true))); |
411 | assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld \x1b[0m" ); |
412 | assert_eq!(iter.rest_slice(), "!" ); |
413 | assert_eq!(iter.next(), Some(("!" , false))); |
414 | assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld \x1b[0m!" ); |
415 | assert_eq!(iter.rest_slice(), "" ); |
416 | assert_eq!(iter.next(), None); |
417 | } |
418 | |
419 | #[test ] |
420 | fn test_ansi_iter_re_on_multi() { |
421 | use crate::style; |
422 | let s = format!(" {}" , style("a" ).red().bold().force_styling(true)); |
423 | let mut iter = AnsiCodeIterator::new(&s); |
424 | assert_eq!(iter.next(), Some((" \x1b[31m" , true))); |
425 | assert_eq!(iter.current_slice(), " \x1b[31m" ); |
426 | assert_eq!(iter.rest_slice(), " \x1b[1ma \x1b[0m" ); |
427 | assert_eq!(iter.next(), Some((" \x1b[1m" , true))); |
428 | assert_eq!(iter.current_slice(), " \x1b[31m \x1b[1m" ); |
429 | assert_eq!(iter.rest_slice(), "a \x1b[0m" ); |
430 | assert_eq!(iter.next(), Some(("a" , false))); |
431 | assert_eq!(iter.current_slice(), " \x1b[31m \x1b[1ma" ); |
432 | assert_eq!(iter.rest_slice(), " \x1b[0m" ); |
433 | assert_eq!(iter.next(), Some((" \x1b[0m" , true))); |
434 | assert_eq!(iter.current_slice(), " \x1b[31m \x1b[1ma \x1b[0m" ); |
435 | assert_eq!(iter.rest_slice(), "" ); |
436 | assert_eq!(iter.next(), None); |
437 | } |
438 | } |
439 | |