1use anstyle_parse::state::state_change;
2use anstyle_parse::state::Action;
3use anstyle_parse::state::State;
4
5/// Strip ANSI escapes from a `&str`, returning the printable content
6///
7/// This can be used to take output from a program that includes escape sequences and write it
8/// somewhere that does not easily support them, such as a log file.
9///
10/// For non-contiguous data, see [`StripStr`].
11///
12/// # Example
13///
14/// ```rust
15/// use std::io::Write as _;
16///
17/// let styled_text = "\x1b[32mfoo\x1b[m bar";
18/// let plain_str = anstream::adapter::strip_str(&styled_text).to_string();
19/// assert_eq!(plain_str, "foo bar");
20/// ```
21#[inline]
22pub fn strip_str(data: &str) -> StrippedStr<'_> {
23 StrippedStr::new(data)
24}
25
26/// See [`strip_str`]
27#[derive(Default, Clone, Debug, PartialEq, Eq)]
28pub struct StrippedStr<'s> {
29 bytes: &'s [u8],
30 state: State,
31}
32
33impl<'s> StrippedStr<'s> {
34 #[inline]
35 fn new(data: &'s str) -> Self {
36 Self {
37 bytes: data.as_bytes(),
38 state: State::Ground,
39 }
40 }
41
42 /// Create a [`String`] of the printable content
43 #[inline]
44 #[allow(clippy::inherent_to_string_shadow_display)] // Single-allocation implementation
45 pub fn to_string(&self) -> String {
46 use std::fmt::Write as _;
47 let mut stripped: String = String::with_capacity(self.bytes.len());
48 let _ = write!(&mut stripped, "{}", self);
49 stripped
50 }
51}
52
53impl<'s> std::fmt::Display for StrippedStr<'s> {
54 /// **Note:** this does *not* exhaust the [`Iterator`]
55 #[inline]
56 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57 let iter: StrippedStr<'_> = Self {
58 bytes: self.bytes,
59 state: self.state,
60 };
61 for printable: &str in iter {
62 printable.fmt(f)?;
63 }
64 Ok(())
65 }
66}
67
68impl<'s> Iterator for StrippedStr<'s> {
69 type Item = &'s str;
70
71 #[inline]
72 fn next(&mut self) -> Option<Self::Item> {
73 next_str(&mut self.bytes, &mut self.state)
74 }
75}
76
77/// Incrementally strip non-contiguous data
78#[derive(Default, Clone, Debug, PartialEq, Eq)]
79pub struct StripStr {
80 state: State,
81}
82
83impl StripStr {
84 /// Initial state
85 pub fn new() -> Self {
86 Default::default()
87 }
88
89 /// Strip the next segment of data
90 pub fn strip_next<'s>(&'s mut self, data: &'s str) -> StripStrIter<'s> {
91 StripStrIter {
92 bytes: data.as_bytes(),
93 state: &mut self.state,
94 }
95 }
96}
97
98/// See [`StripStr`]
99#[derive(Debug, PartialEq, Eq)]
100pub struct StripStrIter<'s> {
101 bytes: &'s [u8],
102 state: &'s mut State,
103}
104
105impl<'s> Iterator for StripStrIter<'s> {
106 type Item = &'s str;
107
108 #[inline]
109 fn next(&mut self) -> Option<Self::Item> {
110 next_str(&mut self.bytes, self.state)
111 }
112}
113
114#[inline]
115fn next_str<'s>(bytes: &mut &'s [u8], state: &mut State) -> Option<&'s str> {
116 let offset = bytes.iter().copied().position(|b| {
117 let (next_state, action) = state_change(*state, b);
118 if next_state != State::Anywhere {
119 *state = next_state;
120 }
121 is_printable_str(action, b)
122 });
123 let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
124 *bytes = next;
125 *state = State::Ground;
126
127 let offset = bytes.iter().copied().position(|b| {
128 let (_next_state, action) = state_change(State::Ground, b);
129 !is_printable_str(action, b)
130 });
131 let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
132 *bytes = next;
133 if printable.is_empty() {
134 None
135 } else {
136 let printable = unsafe {
137 from_utf8_unchecked(
138 printable,
139 "`bytes` was validated as UTF-8, the parser preserves UTF-8 continuations",
140 )
141 };
142 Some(printable)
143 }
144}
145
146#[inline]
147unsafe fn from_utf8_unchecked<'b>(bytes: &'b [u8], safety_justification: &'static str) -> &'b str {
148 if cfg!(debug_assertions) {
149 // Catch problems more quickly when testing
150 std::str::from_utf8(bytes).expect(msg:safety_justification)
151 } else {
152 std::str::from_utf8_unchecked(bytes)
153 }
154}
155
156#[inline]
157fn is_printable_str(action: Action, byte: u8) -> bool {
158 // VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not
159 // ISO Latin-1, making it DEL and non-printable
160 const DEL: u8 = 0x7f;
161 (action == Action::Print && byte != DEL)
162 || action == Action::BeginUtf8
163 // since we know the input is valid UTF-8, the only thing we can do with
164 // continuations is to print them
165 || is_utf8_continuation(byte)
166 || (action == Action::Execute && byte.is_ascii_whitespace())
167}
168
169#[inline]
170fn is_utf8_continuation(b: u8) -> bool {
171 matches!(b, 0x80..=0xbf)
172}
173
174/// Strip ANSI escapes from bytes, returning the printable content
175///
176/// This can be used to take output from a program that includes escape sequences and write it
177/// somewhere that does not easily support them, such as a log file.
178///
179/// # Example
180///
181/// ```rust
182/// use std::io::Write as _;
183///
184/// let styled_text = "\x1b[32mfoo\x1b[m bar";
185/// let plain_str = anstream::adapter::strip_bytes(styled_text.as_bytes()).into_vec();
186/// assert_eq!(plain_str.as_slice(), &b"foo bar"[..]);
187/// ```
188#[inline]
189pub fn strip_bytes(data: &[u8]) -> StrippedBytes<'_> {
190 StrippedBytes::new(bytes:data)
191}
192
193/// See [`strip_bytes`]
194#[derive(Default, Clone, Debug, PartialEq, Eq)]
195pub struct StrippedBytes<'s> {
196 bytes: &'s [u8],
197 state: State,
198 utf8parser: Utf8Parser,
199}
200
201impl<'s> StrippedBytes<'s> {
202 /// See [`strip_bytes`]
203 #[inline]
204 pub fn new(bytes: &'s [u8]) -> Self {
205 Self {
206 bytes,
207 state: State::Ground,
208 utf8parser: Default::default(),
209 }
210 }
211
212 /// Strip the next slice of bytes
213 ///
214 /// Used when the content is in several non-contiguous slices
215 ///
216 /// # Panic
217 ///
218 /// May panic if it is not exhausted / empty
219 #[inline]
220 pub fn extend(&mut self, bytes: &'s [u8]) {
221 debug_assert!(
222 self.is_empty(),
223 "current bytes must be processed to ensure we end at the right state"
224 );
225 self.bytes = bytes;
226 }
227
228 /// Report the bytes has been exhausted
229 #[inline]
230 pub fn is_empty(&self) -> bool {
231 self.bytes.is_empty()
232 }
233
234 /// Create a [`Vec`] of the printable content
235 #[inline]
236 pub fn into_vec(self) -> Vec<u8> {
237 let mut stripped = Vec::with_capacity(self.bytes.len());
238 for printable in self {
239 stripped.extend(printable);
240 }
241 stripped
242 }
243}
244
245impl<'s> Iterator for StrippedBytes<'s> {
246 type Item = &'s [u8];
247
248 #[inline]
249 fn next(&mut self) -> Option<Self::Item> {
250 next_bytes(&mut self.bytes, &mut self.state, &mut self.utf8parser)
251 }
252}
253
254/// Incrementally strip non-contiguous data
255#[derive(Default, Clone, Debug, PartialEq, Eq)]
256pub struct StripBytes {
257 state: State,
258 utf8parser: Utf8Parser,
259}
260
261impl StripBytes {
262 /// Initial state
263 pub fn new() -> Self {
264 Default::default()
265 }
266
267 /// Strip the next segment of data
268 pub fn strip_next<'s>(&'s mut self, bytes: &'s [u8]) -> StripBytesIter<'s> {
269 StripBytesIter {
270 bytes,
271 state: &mut self.state,
272 utf8parser: &mut self.utf8parser,
273 }
274 }
275}
276
277/// See [`StripBytes`]
278#[derive(Debug, PartialEq, Eq)]
279pub struct StripBytesIter<'s> {
280 bytes: &'s [u8],
281 state: &'s mut State,
282 utf8parser: &'s mut Utf8Parser,
283}
284
285impl<'s> Iterator for StripBytesIter<'s> {
286 type Item = &'s [u8];
287
288 #[inline]
289 fn next(&mut self) -> Option<Self::Item> {
290 next_bytes(&mut self.bytes, self.state, self.utf8parser)
291 }
292}
293
294#[inline]
295fn next_bytes<'s>(
296 bytes: &mut &'s [u8],
297 state: &mut State,
298 utf8parser: &mut Utf8Parser,
299) -> Option<&'s [u8]> {
300 let offset = bytes.iter().copied().position(|b| {
301 if *state == State::Utf8 {
302 true
303 } else {
304 let (next_state, action) = state_change(*state, b);
305 if next_state != State::Anywhere {
306 *state = next_state;
307 }
308 is_printable_bytes(action, b)
309 }
310 });
311 let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
312 *bytes = next;
313
314 let offset = bytes.iter().copied().position(|b| {
315 if *state == State::Utf8 {
316 if utf8parser.add(b) {
317 *state = State::Ground;
318 }
319 false
320 } else {
321 let (next_state, action) = state_change(State::Ground, b);
322 if next_state != State::Anywhere {
323 *state = next_state;
324 }
325 if *state == State::Utf8 {
326 utf8parser.add(b);
327 false
328 } else {
329 !is_printable_bytes(action, b)
330 }
331 }
332 });
333 let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
334 *bytes = next;
335 if printable.is_empty() {
336 None
337 } else {
338 Some(printable)
339 }
340}
341
342#[derive(Default, Clone, Debug, PartialEq, Eq)]
343pub struct Utf8Parser {
344 utf8_parser: utf8parse::Parser,
345}
346
347impl Utf8Parser {
348 fn add(&mut self, byte: u8) -> bool {
349 let mut b: bool = false;
350 let mut receiver: VtUtf8Receiver<'_> = VtUtf8Receiver(&mut b);
351 self.utf8_parser.advance(&mut receiver, byte);
352 b
353 }
354}
355
356struct VtUtf8Receiver<'a>(&'a mut bool);
357
358impl<'a> utf8parse::Receiver for VtUtf8Receiver<'a> {
359 fn codepoint(&mut self, _: char) {
360 *self.0 = true;
361 }
362
363 fn invalid_sequence(&mut self) {
364 *self.0 = true;
365 }
366}
367
368#[inline]
369fn is_printable_bytes(action: Action, byte: u8) -> bool {
370 // VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not
371 // ISO Latin-1, making it DEL and non-printable
372 const DEL: u8 = 0x7f;
373
374 // Continuations aren't included as they may also be control codes, requiring more context
375 (action == Action::Print && byte != DEL)
376 || action == Action::BeginUtf8
377 || (action == Action::Execute && byte.is_ascii_whitespace())
378}
379
380#[cfg(test)]
381mod test {
382 use super::*;
383 use proptest::prelude::*;
384
385 /// Model based off full parser
386 fn parser_strip(bytes: &[u8]) -> String {
387 #[derive(Default)]
388 struct Strip(String);
389 impl Strip {
390 fn with_capacity(capacity: usize) -> Self {
391 Self(String::with_capacity(capacity))
392 }
393 }
394 impl anstyle_parse::Perform for Strip {
395 fn print(&mut self, c: char) {
396 self.0.push(c);
397 }
398
399 fn execute(&mut self, byte: u8) {
400 if byte.is_ascii_whitespace() {
401 self.0.push(byte as char);
402 }
403 }
404 }
405
406 let mut stripped = Strip::with_capacity(bytes.len());
407 let mut parser = anstyle_parse::Parser::<anstyle_parse::DefaultCharAccumulator>::new();
408 for byte in bytes {
409 parser.advance(&mut stripped, *byte);
410 }
411 stripped.0
412 }
413
414 /// Model verifying incremental parsing
415 fn strip_char(mut s: &str) -> String {
416 let mut result = String::new();
417 let mut state = StripStr::new();
418 while !s.is_empty() {
419 let mut indices = s.char_indices();
420 indices.next(); // current
421 let offset = indices.next().map(|(i, _)| i).unwrap_or_else(|| s.len());
422 let (current, remainder) = s.split_at(offset);
423 for printable in state.strip_next(current) {
424 result.push_str(printable);
425 }
426 s = remainder;
427 }
428 result
429 }
430
431 /// Model verifying incremental parsing
432 fn strip_byte(s: &[u8]) -> Vec<u8> {
433 let mut result = Vec::new();
434 let mut state = StripBytes::default();
435 for start in 0..s.len() {
436 let current = &s[start..=start];
437 for printable in state.strip_next(current) {
438 result.extend(printable);
439 }
440 }
441 result
442 }
443
444 #[test]
445 fn test_strip_bytes_multibyte() {
446 let bytes = [240, 145, 141, 139];
447 let expected = parser_strip(&bytes);
448 let actual = String::from_utf8(strip_bytes(&bytes).into_vec()).unwrap();
449 assert_eq!(expected, actual);
450 }
451
452 #[test]
453 fn test_strip_byte_multibyte() {
454 let bytes = [240, 145, 141, 139];
455 let expected = parser_strip(&bytes);
456 let actual = String::from_utf8(strip_byte(&bytes).to_vec()).unwrap();
457 assert_eq!(expected, actual);
458 }
459
460 #[test]
461 fn test_strip_str_del() {
462 let input = std::str::from_utf8(&[0x7f]).unwrap();
463 let expected = "";
464 let actual = strip_str(input).to_string();
465 assert_eq!(expected, actual);
466 }
467
468 #[test]
469 fn test_strip_byte_del() {
470 let bytes = [0x7f];
471 let expected = "";
472 let actual = String::from_utf8(strip_byte(&bytes).to_vec()).unwrap();
473 assert_eq!(expected, actual);
474 }
475
476 proptest! {
477 #[test]
478 #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
479 fn strip_str_no_escapes(s in "\\PC*") {
480 let expected = parser_strip(s.as_bytes());
481 let actual = strip_str(&s).to_string();
482 assert_eq!(expected, actual);
483 }
484
485 #[test]
486 #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
487 fn strip_char_no_escapes(s in "\\PC*") {
488 let expected = parser_strip(s.as_bytes());
489 let actual = strip_char(&s);
490 assert_eq!(expected, actual);
491 }
492
493 #[test]
494 #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
495 fn strip_bytes_no_escapes(s in "\\PC*") {
496 dbg!(&s);
497 dbg!(s.as_bytes());
498 let expected = parser_strip(s.as_bytes());
499 let actual = String::from_utf8(strip_bytes(s.as_bytes()).into_vec()).unwrap();
500 assert_eq!(expected, actual);
501 }
502
503 #[test]
504 #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
505 fn strip_byte_no_escapes(s in "\\PC*") {
506 dbg!(&s);
507 dbg!(s.as_bytes());
508 let expected = parser_strip(s.as_bytes());
509 let actual = String::from_utf8(strip_byte(s.as_bytes()).to_vec()).unwrap();
510 assert_eq!(expected, actual);
511 }
512 }
513}
514