1use anstyle_parse::state::state_change;
2use anstyle_parse::state::Action;
3use anstyle_parse::state::State;
4
5/// Strip ANSI escapes from a `&str`, returning the printable content
6///
7/// This can be used to take output from a program that includes escape sequences and write it
8/// somewhere that does not easily support them, such as a log file.
9///
10/// For non-contiguous data, see [`StripStr`].
11///
12/// # Example
13///
14/// ```rust
15/// use std::io::Write as _;
16///
17/// let styled_text = "\x1b[32mfoo\x1b[m bar";
18/// let plain_str = anstream::adapter::strip_str(&styled_text).to_string();
19/// assert_eq!(plain_str, "foo bar");
20/// ```
21#[inline]
22pub fn strip_str(data: &str) -> StrippedStr<'_> {
23 StrippedStr::new(data)
24}
25
26/// See [`strip_str`]
27#[derive(Default, Clone, Debug, PartialEq, Eq)]
28pub struct StrippedStr<'s> {
29 bytes: &'s [u8],
30 state: State,
31}
32
33impl<'s> StrippedStr<'s> {
34 #[inline]
35 fn new(data: &'s str) -> Self {
36 Self {
37 bytes: data.as_bytes(),
38 state: State::Ground,
39 }
40 }
41
42 /// Create a [`String`] of the printable content
43 #[inline]
44 #[allow(clippy::inherent_to_string_shadow_display)] // Single-allocation implementation
45 pub fn to_string(&self) -> String {
46 use std::fmt::Write as _;
47 let mut stripped: String = String::with_capacity(self.bytes.len());
48 let _ = write!(&mut stripped, "{}", self);
49 stripped
50 }
51}
52
53impl<'s> std::fmt::Display for StrippedStr<'s> {
54 /// **Note:** this does *not* exhaust the [`Iterator`]
55 #[inline]
56 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57 let iter: StrippedStr<'_> = Self {
58 bytes: self.bytes,
59 state: self.state,
60 };
61 for printable: &str in iter {
62 printable.fmt(f)?;
63 }
64 Ok(())
65 }
66}
67
68impl<'s> Iterator for StrippedStr<'s> {
69 type Item = &'s str;
70
71 #[inline]
72 fn next(&mut self) -> Option<Self::Item> {
73 next_str(&mut self.bytes, &mut self.state)
74 }
75}
76
77/// Incrementally strip non-contiguous data
78#[derive(Default, Clone, Debug, PartialEq, Eq)]
79pub struct StripStr {
80 state: State,
81}
82
83impl StripStr {
84 /// Initial state
85 pub fn new() -> Self {
86 Default::default()
87 }
88
89 /// Strip the next segment of data
90 pub fn strip_next<'s>(&'s mut self, data: &'s str) -> StripStrIter<'s> {
91 StripStrIter {
92 bytes: data.as_bytes(),
93 state: &mut self.state,
94 }
95 }
96}
97
98/// See [`StripStr`]
99#[derive(Debug, PartialEq, Eq)]
100pub struct StripStrIter<'s> {
101 bytes: &'s [u8],
102 state: &'s mut State,
103}
104
105impl<'s> Iterator for StripStrIter<'s> {
106 type Item = &'s str;
107
108 #[inline]
109 fn next(&mut self) -> Option<Self::Item> {
110 next_str(&mut self.bytes, self.state)
111 }
112}
113
114#[inline]
115fn next_str<'s>(bytes: &mut &'s [u8], state: &mut State) -> Option<&'s str> {
116 let offset = bytes.iter().copied().position(|b| {
117 let (next_state, action) = state_change(*state, b);
118 if next_state != State::Anywhere {
119 *state = next_state;
120 }
121 is_printable_bytes(action, b)
122 });
123 let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
124 *bytes = next;
125 *state = State::Ground;
126
127 let offset = bytes.iter().copied().position(|b| {
128 let (_next_state, action) = state_change(State::Ground, b);
129 !(is_printable_bytes(action, b) || is_utf8_continuation(b))
130 });
131 let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
132 *bytes = next;
133 if printable.is_empty() {
134 None
135 } else {
136 let printable = unsafe {
137 from_utf8_unchecked(
138 printable,
139 "`bytes` was validated as UTF-8, the parser preserves UTF-8 continuations",
140 )
141 };
142 Some(printable)
143 }
144}
145
146#[inline]
147unsafe fn from_utf8_unchecked<'b>(bytes: &'b [u8], safety_justification: &'static str) -> &'b str {
148 if cfg!(debug_assertions) {
149 // Catch problems more quickly when testing
150 std::str::from_utf8(bytes).expect(msg:safety_justification)
151 } else {
152 std::str::from_utf8_unchecked(bytes)
153 }
154}
155
156#[inline]
157fn is_utf8_continuation(b: u8) -> bool {
158 matches!(b, 0x80..=0xbf)
159}
160
161/// Strip ANSI escapes from bytes, returning the printable content
162///
163/// This can be used to take output from a program that includes escape sequences and write it
164/// somewhere that does not easily support them, such as a log file.
165///
166/// # Example
167///
168/// ```rust
169/// use std::io::Write as _;
170///
171/// let styled_text = "\x1b[32mfoo\x1b[m bar";
172/// let plain_str = anstream::adapter::strip_bytes(styled_text.as_bytes()).into_vec();
173/// assert_eq!(plain_str.as_slice(), &b"foo bar"[..]);
174/// ```
175#[inline]
176pub fn strip_bytes(data: &[u8]) -> StrippedBytes<'_> {
177 StrippedBytes::new(bytes:data)
178}
179
180/// See [`strip_bytes`]
181#[derive(Default, Clone, Debug, PartialEq, Eq)]
182pub struct StrippedBytes<'s> {
183 bytes: &'s [u8],
184 state: State,
185 utf8parser: Utf8Parser,
186}
187
188impl<'s> StrippedBytes<'s> {
189 /// See [`strip_bytes`]
190 #[inline]
191 pub fn new(bytes: &'s [u8]) -> Self {
192 Self {
193 bytes,
194 state: State::Ground,
195 utf8parser: Default::default(),
196 }
197 }
198
199 /// Strip the next slice of bytes
200 ///
201 /// Used when the content is in several non-contiguous slices
202 ///
203 /// # Panic
204 ///
205 /// May panic if it is not exhausted / empty
206 #[inline]
207 pub fn extend(&mut self, bytes: &'s [u8]) {
208 debug_assert!(
209 self.is_empty(),
210 "current bytes must be processed to ensure we end at the right state"
211 );
212 self.bytes = bytes;
213 }
214
215 /// Report the bytes has been exhausted
216 #[inline]
217 pub fn is_empty(&self) -> bool {
218 self.bytes.is_empty()
219 }
220
221 /// Create a [`Vec`] of the printable content
222 #[inline]
223 pub fn into_vec(self) -> Vec<u8> {
224 let mut stripped = Vec::with_capacity(self.bytes.len());
225 for printable in self {
226 stripped.extend(printable);
227 }
228 stripped
229 }
230}
231
232impl<'s> Iterator for StrippedBytes<'s> {
233 type Item = &'s [u8];
234
235 #[inline]
236 fn next(&mut self) -> Option<Self::Item> {
237 next_bytes(&mut self.bytes, &mut self.state, &mut self.utf8parser)
238 }
239}
240
241/// Incrementally strip non-contiguous data
242#[derive(Default, Clone, Debug, PartialEq, Eq)]
243pub struct StripBytes {
244 state: State,
245 utf8parser: Utf8Parser,
246}
247
248impl StripBytes {
249 /// Initial state
250 pub fn new() -> Self {
251 Default::default()
252 }
253
254 /// Strip the next segment of data
255 pub fn strip_next<'s>(&'s mut self, bytes: &'s [u8]) -> StripBytesIter<'s> {
256 StripBytesIter {
257 bytes,
258 state: &mut self.state,
259 utf8parser: &mut self.utf8parser,
260 }
261 }
262}
263
264/// See [`StripBytes`]
265#[derive(Debug, PartialEq, Eq)]
266pub struct StripBytesIter<'s> {
267 bytes: &'s [u8],
268 state: &'s mut State,
269 utf8parser: &'s mut Utf8Parser,
270}
271
272impl<'s> Iterator for StripBytesIter<'s> {
273 type Item = &'s [u8];
274
275 #[inline]
276 fn next(&mut self) -> Option<Self::Item> {
277 next_bytes(&mut self.bytes, self.state, self.utf8parser)
278 }
279}
280
281#[inline]
282fn next_bytes<'s>(
283 bytes: &mut &'s [u8],
284 state: &mut State,
285 utf8parser: &mut Utf8Parser,
286) -> Option<&'s [u8]> {
287 let offset = bytes.iter().copied().position(|b| {
288 if *state == State::Utf8 {
289 true
290 } else {
291 let (next_state, action) = state_change(*state, b);
292 if next_state != State::Anywhere {
293 *state = next_state;
294 }
295 is_printable_bytes(action, b)
296 }
297 });
298 let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
299 *bytes = next;
300
301 let offset = bytes.iter().copied().position(|b| {
302 if *state == State::Utf8 {
303 if utf8parser.add(b) {
304 *state = State::Ground;
305 }
306 false
307 } else {
308 let (next_state, action) = state_change(State::Ground, b);
309 if next_state != State::Anywhere {
310 *state = next_state;
311 }
312 if *state == State::Utf8 {
313 utf8parser.add(b);
314 false
315 } else {
316 !is_printable_bytes(action, b)
317 }
318 }
319 });
320 let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
321 *bytes = next;
322 if printable.is_empty() {
323 None
324 } else {
325 Some(printable)
326 }
327}
328
329#[derive(Default, Clone, Debug, PartialEq, Eq)]
330pub struct Utf8Parser {
331 utf8_parser: utf8parse::Parser,
332}
333
334impl Utf8Parser {
335 fn add(&mut self, byte: u8) -> bool {
336 let mut b: bool = false;
337 let mut receiver: VtUtf8Receiver<'_> = VtUtf8Receiver(&mut b);
338 self.utf8_parser.advance(&mut receiver, byte);
339 b
340 }
341}
342
343struct VtUtf8Receiver<'a>(&'a mut bool);
344
345impl<'a> utf8parse::Receiver for VtUtf8Receiver<'a> {
346 fn codepoint(&mut self, _: char) {
347 *self.0 = true;
348 }
349
350 fn invalid_sequence(&mut self) {
351 *self.0 = true;
352 }
353}
354
355#[inline]
356fn is_printable_bytes(action: Action, byte: u8) -> bool {
357 // VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not
358 // ISO Latin-1, making it DEL and non-printable
359 const DEL: u8 = 0x7f;
360
361 // Continuations aren't included as they may also be control codes, requiring more context
362 (action == Action::Print && byte != DEL)
363 || action == Action::BeginUtf8
364 || (action == Action::Execute && byte.is_ascii_whitespace())
365}
366
367#[cfg(test)]
368mod test {
369 use super::*;
370 use proptest::prelude::*;
371
372 /// Model based off full parser
373 fn parser_strip(bytes: &[u8]) -> String {
374 #[derive(Default)]
375 struct Strip(String);
376 impl Strip {
377 fn with_capacity(capacity: usize) -> Self {
378 Self(String::with_capacity(capacity))
379 }
380 }
381 impl anstyle_parse::Perform for Strip {
382 fn print(&mut self, c: char) {
383 self.0.push(c);
384 }
385
386 fn execute(&mut self, byte: u8) {
387 if byte.is_ascii_whitespace() {
388 self.0.push(byte as char);
389 }
390 }
391 }
392
393 let mut stripped = Strip::with_capacity(bytes.len());
394 let mut parser = anstyle_parse::Parser::<anstyle_parse::DefaultCharAccumulator>::new();
395 for byte in bytes {
396 parser.advance(&mut stripped, *byte);
397 }
398 stripped.0
399 }
400
401 /// Model verifying incremental parsing
402 fn strip_char(mut s: &str) -> String {
403 let mut result = String::new();
404 let mut state = StripStr::new();
405 while !s.is_empty() {
406 let mut indices = s.char_indices();
407 indices.next(); // current
408 let offset = indices.next().map(|(i, _)| i).unwrap_or_else(|| s.len());
409 let (current, remainder) = s.split_at(offset);
410 for printable in state.strip_next(current) {
411 result.push_str(printable);
412 }
413 s = remainder;
414 }
415 result
416 }
417
418 /// Model verifying incremental parsing
419 fn strip_byte(s: &[u8]) -> Vec<u8> {
420 let mut result = Vec::new();
421 let mut state = StripBytes::default();
422 for start in 0..s.len() {
423 let current = &s[start..=start];
424 for printable in state.strip_next(current) {
425 result.extend(printable);
426 }
427 }
428 result
429 }
430
431 #[test]
432 fn test_strip_bytes_multibyte() {
433 let bytes = [240, 145, 141, 139];
434 let expected = parser_strip(&bytes);
435 let actual = String::from_utf8(strip_bytes(&bytes).into_vec()).unwrap();
436 assert_eq!(expected, actual);
437 }
438
439 #[test]
440 fn test_strip_byte_multibyte() {
441 let bytes = [240, 145, 141, 139];
442 let expected = parser_strip(&bytes);
443 let actual = String::from_utf8(strip_byte(&bytes).to_vec()).unwrap();
444 assert_eq!(expected, actual);
445 }
446
447 #[test]
448 fn test_strip_str_del() {
449 let input = std::str::from_utf8(&[0x7f]).unwrap();
450 let expected = "";
451 let actual = strip_str(input).to_string();
452 assert_eq!(expected, actual);
453 }
454
455 #[test]
456 fn test_strip_byte_del() {
457 let bytes = [0x7f];
458 let expected = "";
459 let actual = String::from_utf8(strip_byte(&bytes).to_vec()).unwrap();
460 assert_eq!(expected, actual);
461 }
462
463 #[test]
464 fn test_strip_str_handles_broken_sequence() {
465 // valid utf8: \xc3\xb6 then \x1b then \xf0\x9f\x98\x80
466 let s = \x1b😀hello😀goodbye";
467 let mut it = strip_str(s);
468 assert_eq!("ö", it.next().unwrap());
469 assert_eq!("ello😀goodbye", it.next().unwrap());
470 }
471
472 proptest! {
473 #[test]
474 #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
475 fn strip_str_no_escapes(s in "\\PC*") {
476 let expected = parser_strip(s.as_bytes());
477 let actual = strip_str(&s).to_string();
478 assert_eq!(expected, actual);
479 }
480
481 #[test]
482 #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
483 fn strip_char_no_escapes(s in "\\PC*") {
484 let expected = parser_strip(s.as_bytes());
485 let actual = strip_char(&s);
486 assert_eq!(expected, actual);
487 }
488
489 #[test]
490 #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
491 fn strip_bytes_no_escapes(s in "\\PC*") {
492 dbg!(&s);
493 dbg!(s.as_bytes());
494 let expected = parser_strip(s.as_bytes());
495 let actual = String::from_utf8(strip_bytes(s.as_bytes()).into_vec()).unwrap();
496 assert_eq!(expected, actual);
497 }
498
499 #[test]
500 #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
501 fn strip_byte_no_escapes(s in "\\PC*") {
502 dbg!(&s);
503 dbg!(s.as_bytes());
504 let expected = parser_strip(s.as_bytes());
505 let actual = String::from_utf8(strip_byte(s.as_bytes()).to_vec()).unwrap();
506 assert_eq!(expected, actual);
507 }
508 }
509}
510