| 1 | //! Parser for implementing virtual terminal emulators | 
| 2 | //! | 
|---|
| 3 | //! [`Parser`] is implemented according to [Paul Williams' ANSI parser | 
|---|
| 4 | //! state machine]. The state machine doesn't assign meaning to the parsed data | 
|---|
| 5 | //! and is thus not itself sufficient for writing a terminal emulator. Instead, | 
|---|
| 6 | //! it is expected that an implementation of [`Perform`] is provided which does | 
|---|
| 7 | //! something useful with the parsed data. The [`Parser`] handles the book | 
|---|
| 8 | //! keeping, and the [`Perform`] gets to simply handle actions. | 
|---|
| 9 | //! | 
|---|
| 10 | //! # Examples | 
|---|
| 11 | //! | 
|---|
| 12 | //! For an example of using the [`Parser`] please see the examples folder. The example included | 
|---|
| 13 | //! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to | 
|---|
| 14 | //! pipe `vim` into it | 
|---|
| 15 | //! | 
|---|
| 16 | //! ```sh | 
|---|
| 17 | //! cargo build --release --example parselog | 
|---|
| 18 | //! vim | target/release/examples/parselog | 
|---|
| 19 | //! ``` | 
|---|
| 20 | //! | 
|---|
| 21 | //! Just type `:q` to exit. | 
|---|
| 22 | //! | 
|---|
| 23 | //! # Differences from original state machine description | 
|---|
| 24 | //! | 
|---|
| 25 | //! * UTF-8 Support for Input | 
|---|
| 26 | //! * OSC Strings can be terminated by 0x07 | 
|---|
| 27 | //! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in | 
|---|
| 28 | //!   all states. | 
|---|
| 29 | //! | 
|---|
| 30 | //! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser | 
|---|
| 31 | #![ cfg_attr(not(test), no_std)] | 
|---|
| 32 | #![ cfg_attr(docsrs, feature(doc_auto_cfg))] | 
|---|
| 33 | #![ allow(missing_docs)] | 
|---|
| 34 | #![ warn(clippy::print_stderr)] | 
|---|
| 35 | #![ warn(clippy::print_stdout)] | 
|---|
| 36 |  | 
|---|
| 37 | #[ cfg(not(feature = "core"))] | 
|---|
| 38 | extern crate alloc; | 
|---|
| 39 |  | 
|---|
| 40 | use core::mem::MaybeUninit; | 
|---|
| 41 |  | 
|---|
| 42 | #[ cfg(feature = "core")] | 
|---|
| 43 | use arrayvec::ArrayVec; | 
|---|
| 44 | #[ cfg(feature = "utf8")] | 
|---|
| 45 | use utf8parse as utf8; | 
|---|
| 46 |  | 
|---|
| 47 | mod params; | 
|---|
| 48 | pub mod state; | 
|---|
| 49 |  | 
|---|
| 50 | pub use params::{Params, ParamsIter}; | 
|---|
| 51 |  | 
|---|
| 52 | use state::{state_change, Action, State}; | 
|---|
| 53 |  | 
|---|
| 54 | const MAX_INTERMEDIATES: usize = 2; | 
|---|
| 55 | const MAX_OSC_PARAMS: usize = 16; | 
|---|
| 56 | #[ cfg(feature = "core")] | 
|---|
| 57 | const MAX_OSC_RAW: usize = 1024; | 
|---|
| 58 |  | 
|---|
| 59 | /// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`] | 
|---|
| 60 | #[ allow(unused_qualifications)] | 
|---|
| 61 | #[ derive(Default, Clone, Debug, PartialEq, Eq)] | 
|---|
| 62 | pub struct Parser<C = DefaultCharAccumulator> { | 
|---|
| 63 | state: State, | 
|---|
| 64 | intermediates: [u8; MAX_INTERMEDIATES], | 
|---|
| 65 | intermediate_idx: usize, | 
|---|
| 66 | params: Params, | 
|---|
| 67 | param: u16, | 
|---|
| 68 | #[ cfg(feature = "core")] | 
|---|
| 69 | osc_raw: ArrayVec<u8, MAX_OSC_RAW>, | 
|---|
| 70 | #[ cfg(not(feature = "core"))] | 
|---|
| 71 | osc_raw: alloc::vec::Vec<u8>, | 
|---|
| 72 | osc_params: [(usize, usize); MAX_OSC_PARAMS], | 
|---|
| 73 | osc_num_params: usize, | 
|---|
| 74 | ignoring: bool, | 
|---|
| 75 | utf8_parser: C, | 
|---|
| 76 | } | 
|---|
| 77 |  | 
|---|
| 78 | impl<C> Parser<C> | 
|---|
| 79 | where | 
|---|
| 80 | C: CharAccumulator, | 
|---|
| 81 | { | 
|---|
| 82 | /// Create a new Parser | 
|---|
| 83 | pub fn new() -> Parser { | 
|---|
| 84 | Parser::default() | 
|---|
| 85 | } | 
|---|
| 86 |  | 
|---|
| 87 | #[ inline] | 
|---|
| 88 | fn params(&self) -> &Params { | 
|---|
| 89 | &self.params | 
|---|
| 90 | } | 
|---|
| 91 |  | 
|---|
| 92 | #[ inline] | 
|---|
| 93 | fn intermediates(&self) -> &[u8] { | 
|---|
| 94 | &self.intermediates[..self.intermediate_idx] | 
|---|
| 95 | } | 
|---|
| 96 |  | 
|---|
| 97 | /// Advance the parser state | 
|---|
| 98 | /// | 
|---|
| 99 | /// Requires a [`Perform`] in case `byte` triggers an action | 
|---|
| 100 | #[ inline] | 
|---|
| 101 | pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) { | 
|---|
| 102 | // Utf8 characters are handled out-of-band. | 
|---|
| 103 | if let State::Utf8 = self.state { | 
|---|
| 104 | self.process_utf8(performer, byte); | 
|---|
| 105 | return; | 
|---|
| 106 | } | 
|---|
| 107 |  | 
|---|
| 108 | let (state, action) = state_change(self.state, byte); | 
|---|
| 109 | self.perform_state_change(performer, state, action, byte); | 
|---|
| 110 | } | 
|---|
| 111 |  | 
|---|
| 112 | #[ inline] | 
|---|
| 113 | fn process_utf8<P>(&mut self, performer: &mut P, byte: u8) | 
|---|
| 114 | where | 
|---|
| 115 | P: Perform, | 
|---|
| 116 | { | 
|---|
| 117 | if let Some(c) = self.utf8_parser.add(byte) { | 
|---|
| 118 | performer.print(c); | 
|---|
| 119 | self.state = State::Ground; | 
|---|
| 120 | } | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 | #[ inline] | 
|---|
| 124 | fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8) | 
|---|
| 125 | where | 
|---|
| 126 | P: Perform, | 
|---|
| 127 | { | 
|---|
| 128 | match state { | 
|---|
| 129 | State::Anywhere => { | 
|---|
| 130 | // Just run the action | 
|---|
| 131 | self.perform_action(performer, action, byte); | 
|---|
| 132 | } | 
|---|
| 133 | state => { | 
|---|
| 134 | match self.state { | 
|---|
| 135 | State::DcsPassthrough => { | 
|---|
| 136 | self.perform_action(performer, Action::Unhook, byte); | 
|---|
| 137 | } | 
|---|
| 138 | State::OscString => { | 
|---|
| 139 | self.perform_action(performer, Action::OscEnd, byte); | 
|---|
| 140 | } | 
|---|
| 141 | _ => (), | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | match action { | 
|---|
| 145 | Action::Nop => (), | 
|---|
| 146 | action => { | 
|---|
| 147 | self.perform_action(performer, action, byte); | 
|---|
| 148 | } | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | match state { | 
|---|
| 152 | State::CsiEntry | State::DcsEntry | State::Escape => { | 
|---|
| 153 | self.perform_action(performer, Action::Clear, byte); | 
|---|
| 154 | } | 
|---|
| 155 | State::DcsPassthrough => { | 
|---|
| 156 | self.perform_action(performer, Action::Hook, byte); | 
|---|
| 157 | } | 
|---|
| 158 | State::OscString => { | 
|---|
| 159 | self.perform_action(performer, Action::OscStart, byte); | 
|---|
| 160 | } | 
|---|
| 161 | _ => (), | 
|---|
| 162 | } | 
|---|
| 163 |  | 
|---|
| 164 | // Assume the new state | 
|---|
| 165 | self.state = state; | 
|---|
| 166 | } | 
|---|
| 167 | } | 
|---|
| 168 | } | 
|---|
| 169 |  | 
|---|
| 170 | /// Separate method for `osc_dispatch` that borrows self as read-only | 
|---|
| 171 | /// | 
|---|
| 172 | /// The aliasing is needed here for multiple slices into `self.osc_raw` | 
|---|
| 173 | #[ inline] | 
|---|
| 174 | fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) { | 
|---|
| 175 | let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] = | 
|---|
| 176 | unsafe { MaybeUninit::uninit().assume_init() }; | 
|---|
| 177 |  | 
|---|
| 178 | for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) { | 
|---|
| 179 | let indices = self.osc_params[i]; | 
|---|
| 180 | *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]); | 
|---|
| 181 | } | 
|---|
| 182 |  | 
|---|
| 183 | unsafe { | 
|---|
| 184 | let num_params = self.osc_num_params; | 
|---|
| 185 | let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]]; | 
|---|
| 186 | performer.osc_dispatch(&*params, byte == 0x07); | 
|---|
| 187 | } | 
|---|
| 188 | } | 
|---|
| 189 |  | 
|---|
| 190 | #[ inline] | 
|---|
| 191 | fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) { | 
|---|
| 192 | match action { | 
|---|
| 193 | Action::Print => performer.print(byte as char), | 
|---|
| 194 | Action::Execute => performer.execute(byte), | 
|---|
| 195 | Action::Hook => { | 
|---|
| 196 | if self.params.is_full() { | 
|---|
| 197 | self.ignoring = true; | 
|---|
| 198 | } else { | 
|---|
| 199 | self.params.push(self.param); | 
|---|
| 200 | } | 
|---|
| 201 |  | 
|---|
| 202 | performer.hook(self.params(), self.intermediates(), self.ignoring, byte); | 
|---|
| 203 | } | 
|---|
| 204 | Action::Put => performer.put(byte), | 
|---|
| 205 | Action::OscStart => { | 
|---|
| 206 | self.osc_raw.clear(); | 
|---|
| 207 | self.osc_num_params = 0; | 
|---|
| 208 | } | 
|---|
| 209 | Action::OscPut => { | 
|---|
| 210 | #[ cfg(feature = "core")] | 
|---|
| 211 | { | 
|---|
| 212 | if self.osc_raw.is_full() { | 
|---|
| 213 | return; | 
|---|
| 214 | } | 
|---|
| 215 | } | 
|---|
| 216 |  | 
|---|
| 217 | let idx = self.osc_raw.len(); | 
|---|
| 218 |  | 
|---|
| 219 | // Param separator | 
|---|
| 220 | if byte == b';'{ | 
|---|
| 221 | let param_idx = self.osc_num_params; | 
|---|
| 222 | match param_idx { | 
|---|
| 223 | // Only process up to MAX_OSC_PARAMS | 
|---|
| 224 | MAX_OSC_PARAMS => return, | 
|---|
| 225 |  | 
|---|
| 226 | // First param is special - 0 to current byte index | 
|---|
| 227 | 0 => { | 
|---|
| 228 | self.osc_params[param_idx] = (0, idx); | 
|---|
| 229 | } | 
|---|
| 230 |  | 
|---|
| 231 | // All other params depend on previous indexing | 
|---|
| 232 | _ => { | 
|---|
| 233 | let prev = self.osc_params[param_idx - 1]; | 
|---|
| 234 | let begin = prev.1; | 
|---|
| 235 | self.osc_params[param_idx] = (begin, idx); | 
|---|
| 236 | } | 
|---|
| 237 | } | 
|---|
| 238 |  | 
|---|
| 239 | self.osc_num_params += 1; | 
|---|
| 240 | } else { | 
|---|
| 241 | self.osc_raw.push(byte); | 
|---|
| 242 | } | 
|---|
| 243 | } | 
|---|
| 244 | Action::OscEnd => { | 
|---|
| 245 | let param_idx = self.osc_num_params; | 
|---|
| 246 | let idx = self.osc_raw.len(); | 
|---|
| 247 |  | 
|---|
| 248 | match param_idx { | 
|---|
| 249 | // Finish last parameter if not already maxed | 
|---|
| 250 | MAX_OSC_PARAMS => (), | 
|---|
| 251 |  | 
|---|
| 252 | // First param is special - 0 to current byte index | 
|---|
| 253 | 0 => { | 
|---|
| 254 | self.osc_params[param_idx] = (0, idx); | 
|---|
| 255 | self.osc_num_params += 1; | 
|---|
| 256 | } | 
|---|
| 257 |  | 
|---|
| 258 | // All other params depend on previous indexing | 
|---|
| 259 | _ => { | 
|---|
| 260 | let prev = self.osc_params[param_idx - 1]; | 
|---|
| 261 | let begin = prev.1; | 
|---|
| 262 | self.osc_params[param_idx] = (begin, idx); | 
|---|
| 263 | self.osc_num_params += 1; | 
|---|
| 264 | } | 
|---|
| 265 | } | 
|---|
| 266 | self.osc_dispatch(performer, byte); | 
|---|
| 267 | } | 
|---|
| 268 | Action::Unhook => performer.unhook(), | 
|---|
| 269 | Action::CsiDispatch => { | 
|---|
| 270 | if self.params.is_full() { | 
|---|
| 271 | self.ignoring = true; | 
|---|
| 272 | } else { | 
|---|
| 273 | self.params.push(self.param); | 
|---|
| 274 | } | 
|---|
| 275 |  | 
|---|
| 276 | performer.csi_dispatch(self.params(), self.intermediates(), self.ignoring, byte); | 
|---|
| 277 | } | 
|---|
| 278 | Action::EscDispatch => { | 
|---|
| 279 | performer.esc_dispatch(self.intermediates(), self.ignoring, byte); | 
|---|
| 280 | } | 
|---|
| 281 | Action::Collect => { | 
|---|
| 282 | if self.intermediate_idx == MAX_INTERMEDIATES { | 
|---|
| 283 | self.ignoring = true; | 
|---|
| 284 | } else { | 
|---|
| 285 | self.intermediates[self.intermediate_idx] = byte; | 
|---|
| 286 | self.intermediate_idx += 1; | 
|---|
| 287 | } | 
|---|
| 288 | } | 
|---|
| 289 | Action::Param => { | 
|---|
| 290 | if self.params.is_full() { | 
|---|
| 291 | self.ignoring = true; | 
|---|
| 292 | return; | 
|---|
| 293 | } | 
|---|
| 294 |  | 
|---|
| 295 | if byte == b';'{ | 
|---|
| 296 | self.params.push(self.param); | 
|---|
| 297 | self.param = 0; | 
|---|
| 298 | } else if byte == b':'{ | 
|---|
| 299 | self.params.extend(self.param); | 
|---|
| 300 | self.param = 0; | 
|---|
| 301 | } else { | 
|---|
| 302 | // Continue collecting bytes into param | 
|---|
| 303 | self.param = self.param.saturating_mul(10); | 
|---|
| 304 | self.param = self.param.saturating_add((byte - b'0') as u16); | 
|---|
| 305 | } | 
|---|
| 306 | } | 
|---|
| 307 | Action::Clear => { | 
|---|
| 308 | // Reset everything on ESC/CSI/DCS entry | 
|---|
| 309 | self.intermediate_idx = 0; | 
|---|
| 310 | self.ignoring = false; | 
|---|
| 311 | self.param = 0; | 
|---|
| 312 |  | 
|---|
| 313 | self.params.clear(); | 
|---|
| 314 | } | 
|---|
| 315 | Action::BeginUtf8 => self.process_utf8(performer, byte), | 
|---|
| 316 | Action::Ignore => (), | 
|---|
| 317 | Action::Nop => (), | 
|---|
| 318 | } | 
|---|
| 319 | } | 
|---|
| 320 | } | 
|---|
| 321 |  | 
|---|
| 322 | /// Build a `char` out of bytes | 
|---|
| 323 | pub trait CharAccumulator: Default { | 
|---|
| 324 | /// Build a `char` out of bytes | 
|---|
| 325 | /// | 
|---|
| 326 | /// Return `None` when more data is needed | 
|---|
| 327 | fn add(&mut self, byte: u8) -> Option<char>; | 
|---|
| 328 | } | 
|---|
| 329 |  | 
|---|
| 330 | /// Most flexible [`CharAccumulator`] for [`Parser`] based on active features | 
|---|
| 331 | #[ cfg(feature = "utf8")] | 
|---|
| 332 | pub type DefaultCharAccumulator = Utf8Parser; | 
|---|
| 333 | #[ cfg(not(feature = "utf8"))] | 
|---|
| 334 | pub type DefaultCharAccumulator = AsciiParser; | 
|---|
| 335 |  | 
|---|
| 336 | /// Only allow parsing 7-bit ASCII | 
|---|
| 337 | #[ allow(clippy::exhaustive_structs)] | 
|---|
| 338 | #[ derive(Default, Clone, Debug, PartialEq, Eq)] | 
|---|
| 339 | pub struct AsciiParser; | 
|---|
| 340 |  | 
|---|
| 341 | impl CharAccumulator for AsciiParser { | 
|---|
| 342 | fn add(&mut self, _byte: u8) -> Option<char> { | 
|---|
| 343 | unreachable!( "multi-byte UTF8 characters are unsupported") | 
|---|
| 344 | } | 
|---|
| 345 | } | 
|---|
| 346 |  | 
|---|
| 347 | /// Allow parsing UTF-8 | 
|---|
| 348 | #[ cfg(feature = "utf8")] | 
|---|
| 349 | #[ derive(Default, Clone, Debug, PartialEq, Eq)] | 
|---|
| 350 | pub struct Utf8Parser { | 
|---|
| 351 | utf8_parser: utf8::Parser, | 
|---|
| 352 | } | 
|---|
| 353 |  | 
|---|
| 354 | #[ cfg(feature = "utf8")] | 
|---|
| 355 | impl CharAccumulator for Utf8Parser { | 
|---|
| 356 | fn add(&mut self, byte: u8) -> Option<char> { | 
|---|
| 357 | let mut c: Option = None; | 
|---|
| 358 | let mut receiver: VtUtf8Receiver<'_> = VtUtf8Receiver(&mut c); | 
|---|
| 359 | self.utf8_parser.advance(&mut receiver, byte); | 
|---|
| 360 | c | 
|---|
| 361 | } | 
|---|
| 362 | } | 
|---|
| 363 |  | 
|---|
| 364 | #[ cfg(feature = "utf8")] | 
|---|
| 365 | struct VtUtf8Receiver<'a>(&'a mut Option<char>); | 
|---|
| 366 |  | 
|---|
| 367 | #[ cfg(feature = "utf8")] | 
|---|
| 368 | impl<'a> utf8::Receiver for VtUtf8Receiver<'a> { | 
|---|
| 369 | fn codepoint(&mut self, c: char) { | 
|---|
| 370 | *self.0 = Some(c); | 
|---|
| 371 | } | 
|---|
| 372 |  | 
|---|
| 373 | fn invalid_sequence(&mut self) { | 
|---|
| 374 | *self.0 = Some( '�'); | 
|---|
| 375 | } | 
|---|
| 376 | } | 
|---|
| 377 |  | 
|---|
| 378 | /// Performs actions requested by the [`Parser`] | 
|---|
| 379 | /// | 
|---|
| 380 | /// Actions in this case mean, for example, handling a CSI escape sequence describing cursor | 
|---|
| 381 | /// movement, or simply printing characters to the screen. | 
|---|
| 382 | /// | 
|---|
| 383 | /// The methods on this type correspond to actions described in | 
|---|
| 384 | /// <http://vt100.net/emu/dec_ansi_parser>. I've done my best to describe them in | 
|---|
| 385 | /// a useful way in my own words for completeness, but the site should be | 
|---|
| 386 | /// referenced if something isn't clear. If the site disappears at some point in | 
|---|
| 387 | /// the future, consider checking archive.org. | 
|---|
| 388 | pub trait Perform { | 
|---|
| 389 | /// Draw a character to the screen and update states. | 
|---|
| 390 | fn print(&mut self, _c: char) {} | 
|---|
| 391 |  | 
|---|
| 392 | /// Execute a C0 or C1 control function. | 
|---|
| 393 | fn execute(&mut self, _byte: u8) {} | 
|---|
| 394 |  | 
|---|
| 395 | /// Invoked when a final character arrives in first part of device control string. | 
|---|
| 396 | /// | 
|---|
| 397 | /// The control function should be determined from the private marker, final character, and | 
|---|
| 398 | /// execute with a parameter list. A handler should be selected for remaining characters in the | 
|---|
| 399 | /// string; the handler function should subsequently be called by `put` for every character in | 
|---|
| 400 | /// the control string. | 
|---|
| 401 | /// | 
|---|
| 402 | /// The `ignore` flag indicates that more than two intermediates arrived and | 
|---|
| 403 | /// subsequent characters were ignored. | 
|---|
| 404 | fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: u8) {} | 
|---|
| 405 |  | 
|---|
| 406 | /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls | 
|---|
| 407 | /// will also be passed to the handler. | 
|---|
| 408 | fn put(&mut self, _byte: u8) {} | 
|---|
| 409 |  | 
|---|
| 410 | /// Called when a device control string is terminated. | 
|---|
| 411 | /// | 
|---|
| 412 | /// The previously selected handler should be notified that the DCS has | 
|---|
| 413 | /// terminated. | 
|---|
| 414 | fn unhook(&mut self) {} | 
|---|
| 415 |  | 
|---|
| 416 | /// Dispatch an operating system command. | 
|---|
| 417 | fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {} | 
|---|
| 418 |  | 
|---|
| 419 | /// A final character has arrived for a CSI sequence | 
|---|
| 420 | /// | 
|---|
| 421 | /// The `ignore` flag indicates that either more than two intermediates arrived | 
|---|
| 422 | /// or the number of parameters exceeded the maximum supported length, | 
|---|
| 423 | /// and subsequent characters were ignored. | 
|---|
| 424 | fn csi_dispatch( | 
|---|
| 425 | &mut self, | 
|---|
| 426 | _params: &Params, | 
|---|
| 427 | _intermediates: &[u8], | 
|---|
| 428 | _ignore: bool, | 
|---|
| 429 | _action: u8, | 
|---|
| 430 | ) { | 
|---|
| 431 | } | 
|---|
| 432 |  | 
|---|
| 433 | /// The final character of an escape sequence has arrived. | 
|---|
| 434 | /// | 
|---|
| 435 | /// The `ignore` flag indicates that more than two intermediates arrived and | 
|---|
| 436 | /// subsequent characters were ignored. | 
|---|
| 437 | fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {} | 
|---|
| 438 | } | 
|---|
| 439 |  | 
|---|