1 | //! Parser for implementing virtual terminal emulators |
2 | //! |
3 | //! [`Parser`] is implemented according to [Paul Williams' ANSI parser |
4 | //! state machine]. The state machine doesn't assign meaning to the parsed data |
5 | //! and is thus not itself sufficient for writing a terminal emulator. Instead, |
6 | //! it is expected that an implementation of [`Perform`] is provided which does |
7 | //! something useful with the parsed data. The [`Parser`] handles the book |
8 | //! keeping, and the [`Perform`] gets to simply handle actions. |
9 | //! |
10 | //! # Examples |
11 | //! |
12 | //! For an example of using the [`Parser`] please see the examples folder. The example included |
13 | //! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to |
14 | //! pipe `vim` into it |
15 | //! |
16 | //! ```sh |
17 | //! cargo build --release --example parselog |
18 | //! vim | target/release/examples/parselog |
19 | //! ``` |
20 | //! |
21 | //! Just type `:q` to exit. |
22 | //! |
23 | //! # Differences from original state machine description |
24 | //! |
25 | //! * UTF-8 Support for Input |
26 | //! * OSC Strings can be terminated by 0x07 |
27 | //! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in |
28 | //! all states. |
29 | //! |
30 | //! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser |
31 | #![cfg_attr (not(test), no_std)] |
32 | #![cfg_attr (docsrs, feature(doc_auto_cfg))] |
33 | #![allow (missing_docs)] |
34 | #![warn (clippy::print_stderr)] |
35 | #![warn (clippy::print_stdout)] |
36 | |
37 | #[cfg (not(feature = "core" ))] |
38 | extern crate alloc; |
39 | |
40 | use core::mem::MaybeUninit; |
41 | |
42 | #[cfg (feature = "core" )] |
43 | use arrayvec::ArrayVec; |
44 | #[cfg (feature = "utf8" )] |
45 | use utf8parse as utf8; |
46 | |
47 | mod params; |
48 | pub mod state; |
49 | |
50 | pub use params::{Params, ParamsIter}; |
51 | |
52 | use state::{state_change, Action, State}; |
53 | |
54 | const MAX_INTERMEDIATES: usize = 2; |
55 | const MAX_OSC_PARAMS: usize = 16; |
56 | #[cfg (feature = "core" )] |
57 | const MAX_OSC_RAW: usize = 1024; |
58 | |
59 | /// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`] |
60 | #[allow (unused_qualifications)] |
61 | #[derive (Default, Clone, Debug, PartialEq, Eq)] |
62 | pub struct Parser<C = DefaultCharAccumulator> { |
63 | state: State, |
64 | intermediates: [u8; MAX_INTERMEDIATES], |
65 | intermediate_idx: usize, |
66 | params: Params, |
67 | param: u16, |
68 | #[cfg (feature = "core" )] |
69 | osc_raw: ArrayVec<u8, MAX_OSC_RAW>, |
70 | #[cfg (not(feature = "core" ))] |
71 | osc_raw: alloc::vec::Vec<u8>, |
72 | osc_params: [(usize, usize); MAX_OSC_PARAMS], |
73 | osc_num_params: usize, |
74 | ignoring: bool, |
75 | utf8_parser: C, |
76 | } |
77 | |
78 | impl<C> Parser<C> |
79 | where |
80 | C: CharAccumulator, |
81 | { |
82 | /// Create a new Parser |
83 | pub fn new() -> Parser { |
84 | Parser::default() |
85 | } |
86 | |
87 | #[inline ] |
88 | fn params(&self) -> &Params { |
89 | &self.params |
90 | } |
91 | |
92 | #[inline ] |
93 | fn intermediates(&self) -> &[u8] { |
94 | &self.intermediates[..self.intermediate_idx] |
95 | } |
96 | |
97 | /// Advance the parser state |
98 | /// |
99 | /// Requires a [`Perform`] in case `byte` triggers an action |
100 | #[inline ] |
101 | pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) { |
102 | // Utf8 characters are handled out-of-band. |
103 | if let State::Utf8 = self.state { |
104 | self.process_utf8(performer, byte); |
105 | return; |
106 | } |
107 | |
108 | let (state, action) = state_change(self.state, byte); |
109 | self.perform_state_change(performer, state, action, byte); |
110 | } |
111 | |
112 | #[inline ] |
113 | fn process_utf8<P>(&mut self, performer: &mut P, byte: u8) |
114 | where |
115 | P: Perform, |
116 | { |
117 | if let Some(c) = self.utf8_parser.add(byte) { |
118 | performer.print(c); |
119 | self.state = State::Ground; |
120 | } |
121 | } |
122 | |
123 | #[inline ] |
124 | fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8) |
125 | where |
126 | P: Perform, |
127 | { |
128 | match state { |
129 | State::Anywhere => { |
130 | // Just run the action |
131 | self.perform_action(performer, action, byte); |
132 | } |
133 | state => { |
134 | match self.state { |
135 | State::DcsPassthrough => { |
136 | self.perform_action(performer, Action::Unhook, byte); |
137 | } |
138 | State::OscString => { |
139 | self.perform_action(performer, Action::OscEnd, byte); |
140 | } |
141 | _ => (), |
142 | } |
143 | |
144 | match action { |
145 | Action::Nop => (), |
146 | action => { |
147 | self.perform_action(performer, action, byte); |
148 | } |
149 | } |
150 | |
151 | match state { |
152 | State::CsiEntry | State::DcsEntry | State::Escape => { |
153 | self.perform_action(performer, Action::Clear, byte); |
154 | } |
155 | State::DcsPassthrough => { |
156 | self.perform_action(performer, Action::Hook, byte); |
157 | } |
158 | State::OscString => { |
159 | self.perform_action(performer, Action::OscStart, byte); |
160 | } |
161 | _ => (), |
162 | } |
163 | |
164 | // Assume the new state |
165 | self.state = state; |
166 | } |
167 | } |
168 | } |
169 | |
170 | /// Separate method for `osc_dispatch` that borrows self as read-only |
171 | /// |
172 | /// The aliasing is needed here for multiple slices into `self.osc_raw` |
173 | #[inline ] |
174 | fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) { |
175 | let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] = |
176 | unsafe { MaybeUninit::uninit().assume_init() }; |
177 | |
178 | for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) { |
179 | let indices = self.osc_params[i]; |
180 | *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]); |
181 | } |
182 | |
183 | unsafe { |
184 | let num_params = self.osc_num_params; |
185 | let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]]; |
186 | performer.osc_dispatch(&*params, byte == 0x07); |
187 | } |
188 | } |
189 | |
190 | #[inline ] |
191 | fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) { |
192 | match action { |
193 | Action::Print => performer.print(byte as char), |
194 | Action::Execute => performer.execute(byte), |
195 | Action::Hook => { |
196 | if self.params.is_full() { |
197 | self.ignoring = true; |
198 | } else { |
199 | self.params.push(self.param); |
200 | } |
201 | |
202 | performer.hook(self.params(), self.intermediates(), self.ignoring, byte); |
203 | } |
204 | Action::Put => performer.put(byte), |
205 | Action::OscStart => { |
206 | self.osc_raw.clear(); |
207 | self.osc_num_params = 0; |
208 | } |
209 | Action::OscPut => { |
210 | #[cfg (feature = "core" )] |
211 | { |
212 | if self.osc_raw.is_full() { |
213 | return; |
214 | } |
215 | } |
216 | |
217 | let idx = self.osc_raw.len(); |
218 | |
219 | // Param separator |
220 | if byte == b';' { |
221 | let param_idx = self.osc_num_params; |
222 | match param_idx { |
223 | // Only process up to MAX_OSC_PARAMS |
224 | MAX_OSC_PARAMS => return, |
225 | |
226 | // First param is special - 0 to current byte index |
227 | 0 => { |
228 | self.osc_params[param_idx] = (0, idx); |
229 | } |
230 | |
231 | // All other params depend on previous indexing |
232 | _ => { |
233 | let prev = self.osc_params[param_idx - 1]; |
234 | let begin = prev.1; |
235 | self.osc_params[param_idx] = (begin, idx); |
236 | } |
237 | } |
238 | |
239 | self.osc_num_params += 1; |
240 | } else { |
241 | self.osc_raw.push(byte); |
242 | } |
243 | } |
244 | Action::OscEnd => { |
245 | let param_idx = self.osc_num_params; |
246 | let idx = self.osc_raw.len(); |
247 | |
248 | match param_idx { |
249 | // Finish last parameter if not already maxed |
250 | MAX_OSC_PARAMS => (), |
251 | |
252 | // First param is special - 0 to current byte index |
253 | 0 => { |
254 | self.osc_params[param_idx] = (0, idx); |
255 | self.osc_num_params += 1; |
256 | } |
257 | |
258 | // All other params depend on previous indexing |
259 | _ => { |
260 | let prev = self.osc_params[param_idx - 1]; |
261 | let begin = prev.1; |
262 | self.osc_params[param_idx] = (begin, idx); |
263 | self.osc_num_params += 1; |
264 | } |
265 | } |
266 | self.osc_dispatch(performer, byte); |
267 | } |
268 | Action::Unhook => performer.unhook(), |
269 | Action::CsiDispatch => { |
270 | if self.params.is_full() { |
271 | self.ignoring = true; |
272 | } else { |
273 | self.params.push(self.param); |
274 | } |
275 | |
276 | performer.csi_dispatch(self.params(), self.intermediates(), self.ignoring, byte); |
277 | } |
278 | Action::EscDispatch => { |
279 | performer.esc_dispatch(self.intermediates(), self.ignoring, byte); |
280 | } |
281 | Action::Collect => { |
282 | if self.intermediate_idx == MAX_INTERMEDIATES { |
283 | self.ignoring = true; |
284 | } else { |
285 | self.intermediates[self.intermediate_idx] = byte; |
286 | self.intermediate_idx += 1; |
287 | } |
288 | } |
289 | Action::Param => { |
290 | if self.params.is_full() { |
291 | self.ignoring = true; |
292 | return; |
293 | } |
294 | |
295 | if byte == b';' { |
296 | self.params.push(self.param); |
297 | self.param = 0; |
298 | } else if byte == b':' { |
299 | self.params.extend(self.param); |
300 | self.param = 0; |
301 | } else { |
302 | // Continue collecting bytes into param |
303 | self.param = self.param.saturating_mul(10); |
304 | self.param = self.param.saturating_add((byte - b'0' ) as u16); |
305 | } |
306 | } |
307 | Action::Clear => { |
308 | // Reset everything on ESC/CSI/DCS entry |
309 | self.intermediate_idx = 0; |
310 | self.ignoring = false; |
311 | self.param = 0; |
312 | |
313 | self.params.clear(); |
314 | } |
315 | Action::BeginUtf8 => self.process_utf8(performer, byte), |
316 | Action::Ignore => (), |
317 | Action::Nop => (), |
318 | } |
319 | } |
320 | } |
321 | |
322 | /// Build a `char` out of bytes |
323 | pub trait CharAccumulator: Default { |
324 | /// Build a `char` out of bytes |
325 | /// |
326 | /// Return `None` when more data is needed |
327 | fn add(&mut self, byte: u8) -> Option<char>; |
328 | } |
329 | |
330 | /// Most flexible [`CharAccumulator`] for [`Parser`] based on active features |
331 | #[cfg (feature = "utf8" )] |
332 | pub type DefaultCharAccumulator = Utf8Parser; |
333 | #[cfg (not(feature = "utf8" ))] |
334 | pub type DefaultCharAccumulator = AsciiParser; |
335 | |
336 | /// Only allow parsing 7-bit ASCII |
337 | #[allow (clippy::exhaustive_structs)] |
338 | #[derive (Default, Clone, Debug, PartialEq, Eq)] |
339 | pub struct AsciiParser; |
340 | |
341 | impl CharAccumulator for AsciiParser { |
342 | fn add(&mut self, _byte: u8) -> Option<char> { |
343 | unreachable!("multi-byte UTF8 characters are unsupported" ) |
344 | } |
345 | } |
346 | |
347 | /// Allow parsing UTF-8 |
348 | #[cfg (feature = "utf8" )] |
349 | #[derive (Default, Clone, Debug, PartialEq, Eq)] |
350 | pub struct Utf8Parser { |
351 | utf8_parser: utf8::Parser, |
352 | } |
353 | |
354 | #[cfg (feature = "utf8" )] |
355 | impl CharAccumulator for Utf8Parser { |
356 | fn add(&mut self, byte: u8) -> Option<char> { |
357 | let mut c: Option = None; |
358 | let mut receiver: VtUtf8Receiver<'_> = VtUtf8Receiver(&mut c); |
359 | self.utf8_parser.advance(&mut receiver, byte); |
360 | c |
361 | } |
362 | } |
363 | |
364 | #[cfg (feature = "utf8" )] |
365 | struct VtUtf8Receiver<'a>(&'a mut Option<char>); |
366 | |
367 | #[cfg (feature = "utf8" )] |
368 | impl<'a> utf8::Receiver for VtUtf8Receiver<'a> { |
369 | fn codepoint(&mut self, c: char) { |
370 | *self.0 = Some(c); |
371 | } |
372 | |
373 | fn invalid_sequence(&mut self) { |
374 | *self.0 = Some('�' ); |
375 | } |
376 | } |
377 | |
378 | /// Performs actions requested by the [`Parser`] |
379 | /// |
380 | /// Actions in this case mean, for example, handling a CSI escape sequence describing cursor |
381 | /// movement, or simply printing characters to the screen. |
382 | /// |
383 | /// The methods on this type correspond to actions described in |
384 | /// <http://vt100.net/emu/dec_ansi_parser>. I've done my best to describe them in |
385 | /// a useful way in my own words for completeness, but the site should be |
386 | /// referenced if something isn't clear. If the site disappears at some point in |
387 | /// the future, consider checking archive.org. |
388 | pub trait Perform { |
389 | /// Draw a character to the screen and update states. |
390 | fn print(&mut self, _c: char) {} |
391 | |
392 | /// Execute a C0 or C1 control function. |
393 | fn execute(&mut self, _byte: u8) {} |
394 | |
395 | /// Invoked when a final character arrives in first part of device control string. |
396 | /// |
397 | /// The control function should be determined from the private marker, final character, and |
398 | /// execute with a parameter list. A handler should be selected for remaining characters in the |
399 | /// string; the handler function should subsequently be called by `put` for every character in |
400 | /// the control string. |
401 | /// |
402 | /// The `ignore` flag indicates that more than two intermediates arrived and |
403 | /// subsequent characters were ignored. |
404 | fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: u8) {} |
405 | |
406 | /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls |
407 | /// will also be passed to the handler. |
408 | fn put(&mut self, _byte: u8) {} |
409 | |
410 | /// Called when a device control string is terminated. |
411 | /// |
412 | /// The previously selected handler should be notified that the DCS has |
413 | /// terminated. |
414 | fn unhook(&mut self) {} |
415 | |
416 | /// Dispatch an operating system command. |
417 | fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {} |
418 | |
419 | /// A final character has arrived for a CSI sequence |
420 | /// |
421 | /// The `ignore` flag indicates that either more than two intermediates arrived |
422 | /// or the number of parameters exceeded the maximum supported length, |
423 | /// and subsequent characters were ignored. |
424 | fn csi_dispatch( |
425 | &mut self, |
426 | _params: &Params, |
427 | _intermediates: &[u8], |
428 | _ignore: bool, |
429 | _action: u8, |
430 | ) { |
431 | } |
432 | |
433 | /// The final character of an escape sequence has arrived. |
434 | /// |
435 | /// The `ignore` flag indicates that more than two intermediates arrived and |
436 | /// subsequent characters were ignored. |
437 | fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {} |
438 | } |
439 | |