1//! Parser for implementing virtual terminal emulators
2//!
3//! [`Parser`] is implemented according to [Paul Williams' ANSI parser
4//! state machine]. The state machine doesn't assign meaning to the parsed data
5//! and is thus not itself sufficient for writing a terminal emulator. Instead,
6//! it is expected that an implementation of [`Perform`] is provided which does
7//! something useful with the parsed data. The [`Parser`] handles the book
8//! keeping, and the [`Perform`] gets to simply handle actions.
9//!
10//! # Examples
11//!
12//! For an example of using the [`Parser`] please see the examples folder. The example included
13//! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to
14//! pipe `vim` into it
15//!
16//! ```sh
17//! cargo build --release --example parselog
18//! vim | target/release/examples/parselog
19//! ```
20//!
21//! Just type `:q` to exit.
22//!
23//! # Differences from original state machine description
24//!
25//! * UTF-8 Support for Input
26//! * OSC Strings can be terminated by 0x07
27//! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in
28//! all states.
29//!
30//! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser
31#![cfg_attr(not(test), no_std)]
32#![cfg_attr(docsrs, feature(doc_auto_cfg))]
33#![allow(missing_docs)]
34#![warn(clippy::print_stderr)]
35#![warn(clippy::print_stdout)]
36
37#[cfg(not(feature = "core"))]
38extern crate alloc;
39
40use core::mem::MaybeUninit;
41
42#[cfg(feature = "core")]
43use arrayvec::ArrayVec;
44#[cfg(feature = "utf8")]
45use utf8parse as utf8;
46
47mod params;
48pub mod state;
49
50pub use params::{Params, ParamsIter};
51
52use state::{state_change, Action, State};
53
54const MAX_INTERMEDIATES: usize = 2;
55const MAX_OSC_PARAMS: usize = 16;
56#[cfg(feature = "core")]
57const MAX_OSC_RAW: usize = 1024;
58
59/// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`]
60#[allow(unused_qualifications)]
61#[derive(Default, Clone, Debug, PartialEq, Eq)]
62pub struct Parser<C = DefaultCharAccumulator> {
63 state: State,
64 intermediates: [u8; MAX_INTERMEDIATES],
65 intermediate_idx: usize,
66 params: Params,
67 param: u16,
68 #[cfg(feature = "core")]
69 osc_raw: ArrayVec<u8, MAX_OSC_RAW>,
70 #[cfg(not(feature = "core"))]
71 osc_raw: alloc::vec::Vec<u8>,
72 osc_params: [(usize, usize); MAX_OSC_PARAMS],
73 osc_num_params: usize,
74 ignoring: bool,
75 utf8_parser: C,
76}
77
78impl<C> Parser<C>
79where
80 C: CharAccumulator,
81{
82 /// Create a new Parser
83 pub fn new() -> Parser {
84 Parser::default()
85 }
86
87 #[inline]
88 fn params(&self) -> &Params {
89 &self.params
90 }
91
92 #[inline]
93 fn intermediates(&self) -> &[u8] {
94 &self.intermediates[..self.intermediate_idx]
95 }
96
97 /// Advance the parser state
98 ///
99 /// Requires a [`Perform`] in case `byte` triggers an action
100 #[inline]
101 pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) {
102 // Utf8 characters are handled out-of-band.
103 if let State::Utf8 = self.state {
104 self.process_utf8(performer, byte);
105 return;
106 }
107
108 let (state, action) = state_change(self.state, byte);
109 self.perform_state_change(performer, state, action, byte);
110 }
111
112 #[inline]
113 fn process_utf8<P>(&mut self, performer: &mut P, byte: u8)
114 where
115 P: Perform,
116 {
117 if let Some(c) = self.utf8_parser.add(byte) {
118 performer.print(c);
119 self.state = State::Ground;
120 }
121 }
122
123 #[inline]
124 fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8)
125 where
126 P: Perform,
127 {
128 match state {
129 State::Anywhere => {
130 // Just run the action
131 self.perform_action(performer, action, byte);
132 }
133 state => {
134 match self.state {
135 State::DcsPassthrough => {
136 self.perform_action(performer, Action::Unhook, byte);
137 }
138 State::OscString => {
139 self.perform_action(performer, Action::OscEnd, byte);
140 }
141 _ => (),
142 }
143
144 match action {
145 Action::Nop => (),
146 action => {
147 self.perform_action(performer, action, byte);
148 }
149 }
150
151 match state {
152 State::CsiEntry | State::DcsEntry | State::Escape => {
153 self.perform_action(performer, Action::Clear, byte);
154 }
155 State::DcsPassthrough => {
156 self.perform_action(performer, Action::Hook, byte);
157 }
158 State::OscString => {
159 self.perform_action(performer, Action::OscStart, byte);
160 }
161 _ => (),
162 }
163
164 // Assume the new state
165 self.state = state;
166 }
167 }
168 }
169
170 /// Separate method for `osc_dispatch` that borrows self as read-only
171 ///
172 /// The aliasing is needed here for multiple slices into `self.osc_raw`
173 #[inline]
174 fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) {
175 let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] =
176 unsafe { MaybeUninit::uninit().assume_init() };
177
178 for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) {
179 let indices = self.osc_params[i];
180 *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]);
181 }
182
183 unsafe {
184 let num_params = self.osc_num_params;
185 let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]];
186 performer.osc_dispatch(&*params, byte == 0x07);
187 }
188 }
189
190 #[inline]
191 fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) {
192 match action {
193 Action::Print => performer.print(byte as char),
194 Action::Execute => performer.execute(byte),
195 Action::Hook => {
196 if self.params.is_full() {
197 self.ignoring = true;
198 } else {
199 self.params.push(self.param);
200 }
201
202 performer.hook(self.params(), self.intermediates(), self.ignoring, byte);
203 }
204 Action::Put => performer.put(byte),
205 Action::OscStart => {
206 self.osc_raw.clear();
207 self.osc_num_params = 0;
208 }
209 Action::OscPut => {
210 #[cfg(feature = "core")]
211 {
212 if self.osc_raw.is_full() {
213 return;
214 }
215 }
216
217 let idx = self.osc_raw.len();
218
219 // Param separator
220 if byte == b';' {
221 let param_idx = self.osc_num_params;
222 match param_idx {
223 // Only process up to MAX_OSC_PARAMS
224 MAX_OSC_PARAMS => return,
225
226 // First param is special - 0 to current byte index
227 0 => {
228 self.osc_params[param_idx] = (0, idx);
229 }
230
231 // All other params depend on previous indexing
232 _ => {
233 let prev = self.osc_params[param_idx - 1];
234 let begin = prev.1;
235 self.osc_params[param_idx] = (begin, idx);
236 }
237 }
238
239 self.osc_num_params += 1;
240 } else {
241 self.osc_raw.push(byte);
242 }
243 }
244 Action::OscEnd => {
245 let param_idx = self.osc_num_params;
246 let idx = self.osc_raw.len();
247
248 match param_idx {
249 // Finish last parameter if not already maxed
250 MAX_OSC_PARAMS => (),
251
252 // First param is special - 0 to current byte index
253 0 => {
254 self.osc_params[param_idx] = (0, idx);
255 self.osc_num_params += 1;
256 }
257
258 // All other params depend on previous indexing
259 _ => {
260 let prev = self.osc_params[param_idx - 1];
261 let begin = prev.1;
262 self.osc_params[param_idx] = (begin, idx);
263 self.osc_num_params += 1;
264 }
265 }
266 self.osc_dispatch(performer, byte);
267 }
268 Action::Unhook => performer.unhook(),
269 Action::CsiDispatch => {
270 if self.params.is_full() {
271 self.ignoring = true;
272 } else {
273 self.params.push(self.param);
274 }
275
276 performer.csi_dispatch(self.params(), self.intermediates(), self.ignoring, byte);
277 }
278 Action::EscDispatch => {
279 performer.esc_dispatch(self.intermediates(), self.ignoring, byte);
280 }
281 Action::Collect => {
282 if self.intermediate_idx == MAX_INTERMEDIATES {
283 self.ignoring = true;
284 } else {
285 self.intermediates[self.intermediate_idx] = byte;
286 self.intermediate_idx += 1;
287 }
288 }
289 Action::Param => {
290 if self.params.is_full() {
291 self.ignoring = true;
292 return;
293 }
294
295 if byte == b';' {
296 self.params.push(self.param);
297 self.param = 0;
298 } else if byte == b':' {
299 self.params.extend(self.param);
300 self.param = 0;
301 } else {
302 // Continue collecting bytes into param
303 self.param = self.param.saturating_mul(10);
304 self.param = self.param.saturating_add((byte - b'0') as u16);
305 }
306 }
307 Action::Clear => {
308 // Reset everything on ESC/CSI/DCS entry
309 self.intermediate_idx = 0;
310 self.ignoring = false;
311 self.param = 0;
312
313 self.params.clear();
314 }
315 Action::BeginUtf8 => self.process_utf8(performer, byte),
316 Action::Ignore => (),
317 Action::Nop => (),
318 }
319 }
320}
321
322/// Build a `char` out of bytes
323pub trait CharAccumulator: Default {
324 /// Build a `char` out of bytes
325 ///
326 /// Return `None` when more data is needed
327 fn add(&mut self, byte: u8) -> Option<char>;
328}
329
330/// Most flexible [`CharAccumulator`] for [`Parser`] based on active features
331#[cfg(feature = "utf8")]
332pub type DefaultCharAccumulator = Utf8Parser;
333#[cfg(not(feature = "utf8"))]
334pub type DefaultCharAccumulator = AsciiParser;
335
336/// Only allow parsing 7-bit ASCII
337#[allow(clippy::exhaustive_structs)]
338#[derive(Default, Clone, Debug, PartialEq, Eq)]
339pub struct AsciiParser;
340
341impl CharAccumulator for AsciiParser {
342 fn add(&mut self, _byte: u8) -> Option<char> {
343 unreachable!("multi-byte UTF8 characters are unsupported")
344 }
345}
346
347/// Allow parsing UTF-8
348#[cfg(feature = "utf8")]
349#[derive(Default, Clone, Debug, PartialEq, Eq)]
350pub struct Utf8Parser {
351 utf8_parser: utf8::Parser,
352}
353
354#[cfg(feature = "utf8")]
355impl CharAccumulator for Utf8Parser {
356 fn add(&mut self, byte: u8) -> Option<char> {
357 let mut c: Option = None;
358 let mut receiver: VtUtf8Receiver<'_> = VtUtf8Receiver(&mut c);
359 self.utf8_parser.advance(&mut receiver, byte);
360 c
361 }
362}
363
364#[cfg(feature = "utf8")]
365struct VtUtf8Receiver<'a>(&'a mut Option<char>);
366
367#[cfg(feature = "utf8")]
368impl<'a> utf8::Receiver for VtUtf8Receiver<'a> {
369 fn codepoint(&mut self, c: char) {
370 *self.0 = Some(c);
371 }
372
373 fn invalid_sequence(&mut self) {
374 *self.0 = Some('�');
375 }
376}
377
378/// Performs actions requested by the [`Parser`]
379///
380/// Actions in this case mean, for example, handling a CSI escape sequence describing cursor
381/// movement, or simply printing characters to the screen.
382///
383/// The methods on this type correspond to actions described in
384/// <http://vt100.net/emu/dec_ansi_parser>. I've done my best to describe them in
385/// a useful way in my own words for completeness, but the site should be
386/// referenced if something isn't clear. If the site disappears at some point in
387/// the future, consider checking archive.org.
388pub trait Perform {
389 /// Draw a character to the screen and update states.
390 fn print(&mut self, _c: char) {}
391
392 /// Execute a C0 or C1 control function.
393 fn execute(&mut self, _byte: u8) {}
394
395 /// Invoked when a final character arrives in first part of device control string.
396 ///
397 /// The control function should be determined from the private marker, final character, and
398 /// execute with a parameter list. A handler should be selected for remaining characters in the
399 /// string; the handler function should subsequently be called by `put` for every character in
400 /// the control string.
401 ///
402 /// The `ignore` flag indicates that more than two intermediates arrived and
403 /// subsequent characters were ignored.
404 fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: u8) {}
405
406 /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls
407 /// will also be passed to the handler.
408 fn put(&mut self, _byte: u8) {}
409
410 /// Called when a device control string is terminated.
411 ///
412 /// The previously selected handler should be notified that the DCS has
413 /// terminated.
414 fn unhook(&mut self) {}
415
416 /// Dispatch an operating system command.
417 fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {}
418
419 /// A final character has arrived for a CSI sequence
420 ///
421 /// The `ignore` flag indicates that either more than two intermediates arrived
422 /// or the number of parameters exceeded the maximum supported length,
423 /// and subsequent characters were ignored.
424 fn csi_dispatch(
425 &mut self,
426 _params: &Params,
427 _intermediates: &[u8],
428 _ignore: bool,
429 _action: u8,
430 ) {
431 }
432
433 /// The final character of an escape sequence has arrived.
434 ///
435 /// The `ignore` flag indicates that more than two intermediates arrived and
436 /// subsequent characters were ignored.
437 fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {}
438}
439