| 1 | //! Parser for implementing virtual terminal emulators |
| 2 | //! |
| 3 | //! [`Parser`] is implemented according to [Paul Williams' ANSI parser |
| 4 | //! state machine]. The state machine doesn't assign meaning to the parsed data |
| 5 | //! and is thus not itself sufficient for writing a terminal emulator. Instead, |
| 6 | //! it is expected that an implementation of [`Perform`] is provided which does |
| 7 | //! something useful with the parsed data. The [`Parser`] handles the book |
| 8 | //! keeping, and the [`Perform`] gets to simply handle actions. |
| 9 | //! |
| 10 | //! # Examples |
| 11 | //! |
| 12 | //! For an example of using the [`Parser`] please see the examples folder. The example included |
| 13 | //! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to |
| 14 | //! pipe `vim` into it |
| 15 | //! |
| 16 | //! ```sh |
| 17 | //! cargo build --release --example parselog |
| 18 | //! vim | target/release/examples/parselog |
| 19 | //! ``` |
| 20 | //! |
| 21 | //! Just type `:q` to exit. |
| 22 | //! |
| 23 | //! # Differences from original state machine description |
| 24 | //! |
| 25 | //! * UTF-8 Support for Input |
| 26 | //! * OSC Strings can be terminated by 0x07 |
| 27 | //! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in |
| 28 | //! all states. |
| 29 | //! |
| 30 | //! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser |
| 31 | #![cfg_attr (not(test), no_std)] |
| 32 | #![cfg_attr (docsrs, feature(doc_auto_cfg))] |
| 33 | #![allow (missing_docs)] |
| 34 | #![warn (clippy::print_stderr)] |
| 35 | #![warn (clippy::print_stdout)] |
| 36 | |
| 37 | #[cfg (not(feature = "core" ))] |
| 38 | extern crate alloc; |
| 39 | |
| 40 | use core::mem::MaybeUninit; |
| 41 | |
| 42 | #[cfg (feature = "core" )] |
| 43 | use arrayvec::ArrayVec; |
| 44 | #[cfg (feature = "utf8" )] |
| 45 | use utf8parse as utf8; |
| 46 | |
| 47 | mod params; |
| 48 | pub mod state; |
| 49 | |
| 50 | pub use params::{Params, ParamsIter}; |
| 51 | |
| 52 | use state::{state_change, Action, State}; |
| 53 | |
| 54 | const MAX_INTERMEDIATES: usize = 2; |
| 55 | const MAX_OSC_PARAMS: usize = 16; |
| 56 | #[cfg (feature = "core" )] |
| 57 | const MAX_OSC_RAW: usize = 1024; |
| 58 | |
| 59 | /// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`] |
| 60 | #[allow (unused_qualifications)] |
| 61 | #[derive (Default, Clone, Debug, PartialEq, Eq)] |
| 62 | pub struct Parser<C = DefaultCharAccumulator> { |
| 63 | state: State, |
| 64 | intermediates: [u8; MAX_INTERMEDIATES], |
| 65 | intermediate_idx: usize, |
| 66 | params: Params, |
| 67 | param: u16, |
| 68 | #[cfg (feature = "core" )] |
| 69 | osc_raw: ArrayVec<u8, MAX_OSC_RAW>, |
| 70 | #[cfg (not(feature = "core" ))] |
| 71 | osc_raw: alloc::vec::Vec<u8>, |
| 72 | osc_params: [(usize, usize); MAX_OSC_PARAMS], |
| 73 | osc_num_params: usize, |
| 74 | ignoring: bool, |
| 75 | utf8_parser: C, |
| 76 | } |
| 77 | |
| 78 | impl<C> Parser<C> |
| 79 | where |
| 80 | C: CharAccumulator, |
| 81 | { |
| 82 | /// Create a new Parser |
| 83 | pub fn new() -> Parser { |
| 84 | Parser::default() |
| 85 | } |
| 86 | |
| 87 | #[inline ] |
| 88 | fn params(&self) -> &Params { |
| 89 | &self.params |
| 90 | } |
| 91 | |
| 92 | #[inline ] |
| 93 | fn intermediates(&self) -> &[u8] { |
| 94 | &self.intermediates[..self.intermediate_idx] |
| 95 | } |
| 96 | |
| 97 | /// Advance the parser state |
| 98 | /// |
| 99 | /// Requires a [`Perform`] in case `byte` triggers an action |
| 100 | #[inline ] |
| 101 | pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) { |
| 102 | // Utf8 characters are handled out-of-band. |
| 103 | if let State::Utf8 = self.state { |
| 104 | self.process_utf8(performer, byte); |
| 105 | return; |
| 106 | } |
| 107 | |
| 108 | let (state, action) = state_change(self.state, byte); |
| 109 | self.perform_state_change(performer, state, action, byte); |
| 110 | } |
| 111 | |
| 112 | #[inline ] |
| 113 | fn process_utf8<P>(&mut self, performer: &mut P, byte: u8) |
| 114 | where |
| 115 | P: Perform, |
| 116 | { |
| 117 | if let Some(c) = self.utf8_parser.add(byte) { |
| 118 | performer.print(c); |
| 119 | self.state = State::Ground; |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | #[inline ] |
| 124 | fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8) |
| 125 | where |
| 126 | P: Perform, |
| 127 | { |
| 128 | match state { |
| 129 | State::Anywhere => { |
| 130 | // Just run the action |
| 131 | self.perform_action(performer, action, byte); |
| 132 | } |
| 133 | state => { |
| 134 | match self.state { |
| 135 | State::DcsPassthrough => { |
| 136 | self.perform_action(performer, Action::Unhook, byte); |
| 137 | } |
| 138 | State::OscString => { |
| 139 | self.perform_action(performer, Action::OscEnd, byte); |
| 140 | } |
| 141 | _ => (), |
| 142 | } |
| 143 | |
| 144 | match action { |
| 145 | Action::Nop => (), |
| 146 | action => { |
| 147 | self.perform_action(performer, action, byte); |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | match state { |
| 152 | State::CsiEntry | State::DcsEntry | State::Escape => { |
| 153 | self.perform_action(performer, Action::Clear, byte); |
| 154 | } |
| 155 | State::DcsPassthrough => { |
| 156 | self.perform_action(performer, Action::Hook, byte); |
| 157 | } |
| 158 | State::OscString => { |
| 159 | self.perform_action(performer, Action::OscStart, byte); |
| 160 | } |
| 161 | _ => (), |
| 162 | } |
| 163 | |
| 164 | // Assume the new state |
| 165 | self.state = state; |
| 166 | } |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | /// Separate method for `osc_dispatch` that borrows self as read-only |
| 171 | /// |
| 172 | /// The aliasing is needed here for multiple slices into `self.osc_raw` |
| 173 | #[inline ] |
| 174 | fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) { |
| 175 | let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] = |
| 176 | unsafe { MaybeUninit::uninit().assume_init() }; |
| 177 | |
| 178 | for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) { |
| 179 | let indices = self.osc_params[i]; |
| 180 | *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]); |
| 181 | } |
| 182 | |
| 183 | unsafe { |
| 184 | let num_params = self.osc_num_params; |
| 185 | let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]]; |
| 186 | performer.osc_dispatch(&*params, byte == 0x07); |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | #[inline ] |
| 191 | fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) { |
| 192 | match action { |
| 193 | Action::Print => performer.print(byte as char), |
| 194 | Action::Execute => performer.execute(byte), |
| 195 | Action::Hook => { |
| 196 | if self.params.is_full() { |
| 197 | self.ignoring = true; |
| 198 | } else { |
| 199 | self.params.push(self.param); |
| 200 | } |
| 201 | |
| 202 | performer.hook(self.params(), self.intermediates(), self.ignoring, byte); |
| 203 | } |
| 204 | Action::Put => performer.put(byte), |
| 205 | Action::OscStart => { |
| 206 | self.osc_raw.clear(); |
| 207 | self.osc_num_params = 0; |
| 208 | } |
| 209 | Action::OscPut => { |
| 210 | #[cfg (feature = "core" )] |
| 211 | { |
| 212 | if self.osc_raw.is_full() { |
| 213 | return; |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | let idx = self.osc_raw.len(); |
| 218 | |
| 219 | // Param separator |
| 220 | if byte == b';' { |
| 221 | let param_idx = self.osc_num_params; |
| 222 | match param_idx { |
| 223 | // Only process up to MAX_OSC_PARAMS |
| 224 | MAX_OSC_PARAMS => return, |
| 225 | |
| 226 | // First param is special - 0 to current byte index |
| 227 | 0 => { |
| 228 | self.osc_params[param_idx] = (0, idx); |
| 229 | } |
| 230 | |
| 231 | // All other params depend on previous indexing |
| 232 | _ => { |
| 233 | let prev = self.osc_params[param_idx - 1]; |
| 234 | let begin = prev.1; |
| 235 | self.osc_params[param_idx] = (begin, idx); |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | self.osc_num_params += 1; |
| 240 | } else { |
| 241 | self.osc_raw.push(byte); |
| 242 | } |
| 243 | } |
| 244 | Action::OscEnd => { |
| 245 | let param_idx = self.osc_num_params; |
| 246 | let idx = self.osc_raw.len(); |
| 247 | |
| 248 | match param_idx { |
| 249 | // Finish last parameter if not already maxed |
| 250 | MAX_OSC_PARAMS => (), |
| 251 | |
| 252 | // First param is special - 0 to current byte index |
| 253 | 0 => { |
| 254 | self.osc_params[param_idx] = (0, idx); |
| 255 | self.osc_num_params += 1; |
| 256 | } |
| 257 | |
| 258 | // All other params depend on previous indexing |
| 259 | _ => { |
| 260 | let prev = self.osc_params[param_idx - 1]; |
| 261 | let begin = prev.1; |
| 262 | self.osc_params[param_idx] = (begin, idx); |
| 263 | self.osc_num_params += 1; |
| 264 | } |
| 265 | } |
| 266 | self.osc_dispatch(performer, byte); |
| 267 | } |
| 268 | Action::Unhook => performer.unhook(), |
| 269 | Action::CsiDispatch => { |
| 270 | if self.params.is_full() { |
| 271 | self.ignoring = true; |
| 272 | } else { |
| 273 | self.params.push(self.param); |
| 274 | } |
| 275 | |
| 276 | performer.csi_dispatch(self.params(), self.intermediates(), self.ignoring, byte); |
| 277 | } |
| 278 | Action::EscDispatch => { |
| 279 | performer.esc_dispatch(self.intermediates(), self.ignoring, byte); |
| 280 | } |
| 281 | Action::Collect => { |
| 282 | if self.intermediate_idx == MAX_INTERMEDIATES { |
| 283 | self.ignoring = true; |
| 284 | } else { |
| 285 | self.intermediates[self.intermediate_idx] = byte; |
| 286 | self.intermediate_idx += 1; |
| 287 | } |
| 288 | } |
| 289 | Action::Param => { |
| 290 | if self.params.is_full() { |
| 291 | self.ignoring = true; |
| 292 | return; |
| 293 | } |
| 294 | |
| 295 | if byte == b';' { |
| 296 | self.params.push(self.param); |
| 297 | self.param = 0; |
| 298 | } else if byte == b':' { |
| 299 | self.params.extend(self.param); |
| 300 | self.param = 0; |
| 301 | } else { |
| 302 | // Continue collecting bytes into param |
| 303 | self.param = self.param.saturating_mul(10); |
| 304 | self.param = self.param.saturating_add((byte - b'0' ) as u16); |
| 305 | } |
| 306 | } |
| 307 | Action::Clear => { |
| 308 | // Reset everything on ESC/CSI/DCS entry |
| 309 | self.intermediate_idx = 0; |
| 310 | self.ignoring = false; |
| 311 | self.param = 0; |
| 312 | |
| 313 | self.params.clear(); |
| 314 | } |
| 315 | Action::BeginUtf8 => self.process_utf8(performer, byte), |
| 316 | Action::Ignore => (), |
| 317 | Action::Nop => (), |
| 318 | } |
| 319 | } |
| 320 | } |
| 321 | |
| 322 | /// Build a `char` out of bytes |
| 323 | pub trait CharAccumulator: Default { |
| 324 | /// Build a `char` out of bytes |
| 325 | /// |
| 326 | /// Return `None` when more data is needed |
| 327 | fn add(&mut self, byte: u8) -> Option<char>; |
| 328 | } |
| 329 | |
| 330 | /// Most flexible [`CharAccumulator`] for [`Parser`] based on active features |
| 331 | #[cfg (feature = "utf8" )] |
| 332 | pub type DefaultCharAccumulator = Utf8Parser; |
| 333 | #[cfg (not(feature = "utf8" ))] |
| 334 | pub type DefaultCharAccumulator = AsciiParser; |
| 335 | |
| 336 | /// Only allow parsing 7-bit ASCII |
| 337 | #[allow (clippy::exhaustive_structs)] |
| 338 | #[derive (Default, Clone, Debug, PartialEq, Eq)] |
| 339 | pub struct AsciiParser; |
| 340 | |
| 341 | impl CharAccumulator for AsciiParser { |
| 342 | fn add(&mut self, _byte: u8) -> Option<char> { |
| 343 | unreachable!("multi-byte UTF8 characters are unsupported" ) |
| 344 | } |
| 345 | } |
| 346 | |
| 347 | /// Allow parsing UTF-8 |
| 348 | #[cfg (feature = "utf8" )] |
| 349 | #[derive (Default, Clone, Debug, PartialEq, Eq)] |
| 350 | pub struct Utf8Parser { |
| 351 | utf8_parser: utf8::Parser, |
| 352 | } |
| 353 | |
| 354 | #[cfg (feature = "utf8" )] |
| 355 | impl CharAccumulator for Utf8Parser { |
| 356 | fn add(&mut self, byte: u8) -> Option<char> { |
| 357 | let mut c: Option = None; |
| 358 | let mut receiver: VtUtf8Receiver<'_> = VtUtf8Receiver(&mut c); |
| 359 | self.utf8_parser.advance(&mut receiver, byte); |
| 360 | c |
| 361 | } |
| 362 | } |
| 363 | |
| 364 | #[cfg (feature = "utf8" )] |
| 365 | struct VtUtf8Receiver<'a>(&'a mut Option<char>); |
| 366 | |
| 367 | #[cfg (feature = "utf8" )] |
| 368 | impl<'a> utf8::Receiver for VtUtf8Receiver<'a> { |
| 369 | fn codepoint(&mut self, c: char) { |
| 370 | *self.0 = Some(c); |
| 371 | } |
| 372 | |
| 373 | fn invalid_sequence(&mut self) { |
| 374 | *self.0 = Some('�' ); |
| 375 | } |
| 376 | } |
| 377 | |
| 378 | /// Performs actions requested by the [`Parser`] |
| 379 | /// |
| 380 | /// Actions in this case mean, for example, handling a CSI escape sequence describing cursor |
| 381 | /// movement, or simply printing characters to the screen. |
| 382 | /// |
| 383 | /// The methods on this type correspond to actions described in |
| 384 | /// <http://vt100.net/emu/dec_ansi_parser>. I've done my best to describe them in |
| 385 | /// a useful way in my own words for completeness, but the site should be |
| 386 | /// referenced if something isn't clear. If the site disappears at some point in |
| 387 | /// the future, consider checking archive.org. |
| 388 | pub trait Perform { |
| 389 | /// Draw a character to the screen and update states. |
| 390 | fn print(&mut self, _c: char) {} |
| 391 | |
| 392 | /// Execute a C0 or C1 control function. |
| 393 | fn execute(&mut self, _byte: u8) {} |
| 394 | |
| 395 | /// Invoked when a final character arrives in first part of device control string. |
| 396 | /// |
| 397 | /// The control function should be determined from the private marker, final character, and |
| 398 | /// execute with a parameter list. A handler should be selected for remaining characters in the |
| 399 | /// string; the handler function should subsequently be called by `put` for every character in |
| 400 | /// the control string. |
| 401 | /// |
| 402 | /// The `ignore` flag indicates that more than two intermediates arrived and |
| 403 | /// subsequent characters were ignored. |
| 404 | fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: u8) {} |
| 405 | |
| 406 | /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls |
| 407 | /// will also be passed to the handler. |
| 408 | fn put(&mut self, _byte: u8) {} |
| 409 | |
| 410 | /// Called when a device control string is terminated. |
| 411 | /// |
| 412 | /// The previously selected handler should be notified that the DCS has |
| 413 | /// terminated. |
| 414 | fn unhook(&mut self) {} |
| 415 | |
| 416 | /// Dispatch an operating system command. |
| 417 | fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {} |
| 418 | |
| 419 | /// A final character has arrived for a CSI sequence |
| 420 | /// |
| 421 | /// The `ignore` flag indicates that either more than two intermediates arrived |
| 422 | /// or the number of parameters exceeded the maximum supported length, |
| 423 | /// and subsequent characters were ignored. |
| 424 | fn csi_dispatch( |
| 425 | &mut self, |
| 426 | _params: &Params, |
| 427 | _intermediates: &[u8], |
| 428 | _ignore: bool, |
| 429 | _action: u8, |
| 430 | ) { |
| 431 | } |
| 432 | |
| 433 | /// The final character of an escape sequence has arrived. |
| 434 | /// |
| 435 | /// The `ignore` flag indicates that more than two intermediates arrived and |
| 436 | /// subsequent characters were ignored. |
| 437 | fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {} |
| 438 | } |
| 439 | |