1 | //! Parser for implementing virtual terminal emulators |
2 | //! |
3 | //! [`Parser`] is implemented according to [Paul Williams' ANSI parser |
4 | //! state machine]. The state machine doesn't assign meaning to the parsed data |
5 | //! and is thus not itself sufficient for writing a terminal emulator. Instead, |
6 | //! it is expected that an implementation of [`Perform`] is provided which does |
7 | //! something useful with the parsed data. The [`Parser`] handles the book |
8 | //! keeping, and the [`Perform`] gets to simply handle actions. |
9 | //! |
10 | //! # Examples |
11 | //! |
12 | //! For an example of using the [`Parser`] please see the examples folder. The example included |
13 | //! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to |
14 | //! pipe `vim` into it |
15 | //! |
16 | //! ```sh |
17 | //! cargo build --release --example parselog |
18 | //! vim | target/release/examples/parselog |
19 | //! ``` |
20 | //! |
21 | //! Just type `:q` to exit. |
22 | //! |
23 | //! # Differences from original state machine description |
24 | //! |
25 | //! * UTF-8 Support for Input |
26 | //! * OSC Strings can be terminated by 0x07 |
27 | //! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in |
28 | //! all states. |
29 | //! |
30 | //! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser |
31 | #![cfg_attr (not(test), no_std)] |
32 | |
33 | #[cfg (not(feature = "core" ))] |
34 | extern crate alloc; |
35 | |
36 | use core::mem::MaybeUninit; |
37 | |
38 | #[cfg (feature = "core" )] |
39 | use arrayvec::ArrayVec; |
40 | #[cfg (feature = "utf8" )] |
41 | use utf8parse as utf8; |
42 | |
43 | mod params; |
44 | pub mod state; |
45 | |
46 | pub use params::{Params, ParamsIter}; |
47 | |
48 | use state::{state_change, Action, State}; |
49 | |
50 | const MAX_INTERMEDIATES: usize = 2; |
51 | const MAX_OSC_PARAMS: usize = 16; |
52 | #[cfg (feature = "core" )] |
53 | const MAX_OSC_RAW: usize = 1024; |
54 | |
55 | /// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`] |
56 | #[derive (Default, Clone, Debug, PartialEq, Eq)] |
57 | pub struct Parser<C = DefaultCharAccumulator> { |
58 | state: State, |
59 | intermediates: [u8; MAX_INTERMEDIATES], |
60 | intermediate_idx: usize, |
61 | params: Params, |
62 | param: u16, |
63 | #[cfg (feature = "core" )] |
64 | osc_raw: ArrayVec<u8, MAX_OSC_RAW>, |
65 | #[cfg (not(feature = "core" ))] |
66 | osc_raw: alloc::vec::Vec<u8>, |
67 | osc_params: [(usize, usize); MAX_OSC_PARAMS], |
68 | osc_num_params: usize, |
69 | ignoring: bool, |
70 | utf8_parser: C, |
71 | } |
72 | |
73 | impl<C> Parser<C> |
74 | where |
75 | C: CharAccumulator, |
76 | { |
77 | /// Create a new Parser |
78 | pub fn new() -> Parser { |
79 | Parser::default() |
80 | } |
81 | |
82 | #[inline ] |
83 | fn params(&self) -> &Params { |
84 | &self.params |
85 | } |
86 | |
87 | #[inline ] |
88 | fn intermediates(&self) -> &[u8] { |
89 | &self.intermediates[..self.intermediate_idx] |
90 | } |
91 | |
92 | /// Advance the parser state |
93 | /// |
94 | /// Requires a [`Perform`] in case `byte` triggers an action |
95 | #[inline ] |
96 | pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) { |
97 | // Utf8 characters are handled out-of-band. |
98 | if let State::Utf8 = self.state { |
99 | self.process_utf8(performer, byte); |
100 | return; |
101 | } |
102 | |
103 | let (state, action) = state_change(self.state, byte); |
104 | self.perform_state_change(performer, state, action, byte); |
105 | } |
106 | |
107 | #[inline ] |
108 | fn process_utf8<P>(&mut self, performer: &mut P, byte: u8) |
109 | where |
110 | P: Perform, |
111 | { |
112 | if let Some(c) = self.utf8_parser.add(byte) { |
113 | performer.print(c); |
114 | self.state = State::Ground; |
115 | } |
116 | } |
117 | |
118 | #[inline ] |
119 | fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8) |
120 | where |
121 | P: Perform, |
122 | { |
123 | match state { |
124 | State::Anywhere => { |
125 | // Just run the action |
126 | self.perform_action(performer, action, byte); |
127 | } |
128 | state => { |
129 | match self.state { |
130 | State::DcsPassthrough => { |
131 | self.perform_action(performer, Action::Unhook, byte); |
132 | } |
133 | State::OscString => { |
134 | self.perform_action(performer, Action::OscEnd, byte); |
135 | } |
136 | _ => (), |
137 | } |
138 | |
139 | match action { |
140 | Action::Nop => (), |
141 | action => { |
142 | self.perform_action(performer, action, byte); |
143 | } |
144 | } |
145 | |
146 | match state { |
147 | State::CsiEntry | State::DcsEntry | State::Escape => { |
148 | self.perform_action(performer, Action::Clear, byte); |
149 | } |
150 | State::DcsPassthrough => { |
151 | self.perform_action(performer, Action::Hook, byte); |
152 | } |
153 | State::OscString => { |
154 | self.perform_action(performer, Action::OscStart, byte); |
155 | } |
156 | _ => (), |
157 | } |
158 | |
159 | // Assume the new state |
160 | self.state = state; |
161 | } |
162 | } |
163 | } |
164 | |
165 | /// Separate method for osc_dispatch that borrows self as read-only |
166 | /// |
167 | /// The aliasing is needed here for multiple slices into self.osc_raw |
168 | #[inline ] |
169 | fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) { |
170 | let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] = |
171 | unsafe { MaybeUninit::uninit().assume_init() }; |
172 | |
173 | for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) { |
174 | let indices = self.osc_params[i]; |
175 | *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]); |
176 | } |
177 | |
178 | unsafe { |
179 | let num_params = self.osc_num_params; |
180 | let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]]; |
181 | performer.osc_dispatch(&*params, byte == 0x07); |
182 | } |
183 | } |
184 | |
185 | #[inline ] |
186 | fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) { |
187 | match action { |
188 | Action::Print => performer.print(byte as char), |
189 | Action::Execute => performer.execute(byte), |
190 | Action::Hook => { |
191 | if self.params.is_full() { |
192 | self.ignoring = true; |
193 | } else { |
194 | self.params.push(self.param); |
195 | } |
196 | |
197 | performer.hook(self.params(), self.intermediates(), self.ignoring, byte); |
198 | } |
199 | Action::Put => performer.put(byte), |
200 | Action::OscStart => { |
201 | self.osc_raw.clear(); |
202 | self.osc_num_params = 0; |
203 | } |
204 | Action::OscPut => { |
205 | #[cfg (feature = "core" )] |
206 | { |
207 | if self.osc_raw.is_full() { |
208 | return; |
209 | } |
210 | } |
211 | |
212 | let idx = self.osc_raw.len(); |
213 | |
214 | // Param separator |
215 | if byte == b';' { |
216 | let param_idx = self.osc_num_params; |
217 | match param_idx { |
218 | // Only process up to MAX_OSC_PARAMS |
219 | MAX_OSC_PARAMS => return, |
220 | |
221 | // First param is special - 0 to current byte index |
222 | 0 => { |
223 | self.osc_params[param_idx] = (0, idx); |
224 | } |
225 | |
226 | // All other params depend on previous indexing |
227 | _ => { |
228 | let prev = self.osc_params[param_idx - 1]; |
229 | let begin = prev.1; |
230 | self.osc_params[param_idx] = (begin, idx); |
231 | } |
232 | } |
233 | |
234 | self.osc_num_params += 1; |
235 | } else { |
236 | self.osc_raw.push(byte); |
237 | } |
238 | } |
239 | Action::OscEnd => { |
240 | let param_idx = self.osc_num_params; |
241 | let idx = self.osc_raw.len(); |
242 | |
243 | match param_idx { |
244 | // Finish last parameter if not already maxed |
245 | MAX_OSC_PARAMS => (), |
246 | |
247 | // First param is special - 0 to current byte index |
248 | 0 => { |
249 | self.osc_params[param_idx] = (0, idx); |
250 | self.osc_num_params += 1; |
251 | } |
252 | |
253 | // All other params depend on previous indexing |
254 | _ => { |
255 | let prev = self.osc_params[param_idx - 1]; |
256 | let begin = prev.1; |
257 | self.osc_params[param_idx] = (begin, idx); |
258 | self.osc_num_params += 1; |
259 | } |
260 | } |
261 | self.osc_dispatch(performer, byte); |
262 | } |
263 | Action::Unhook => performer.unhook(), |
264 | Action::CsiDispatch => { |
265 | if self.params.is_full() { |
266 | self.ignoring = true; |
267 | } else { |
268 | self.params.push(self.param); |
269 | } |
270 | |
271 | performer.csi_dispatch(self.params(), self.intermediates(), self.ignoring, byte); |
272 | } |
273 | Action::EscDispatch => { |
274 | performer.esc_dispatch(self.intermediates(), self.ignoring, byte); |
275 | } |
276 | Action::Collect => { |
277 | if self.intermediate_idx == MAX_INTERMEDIATES { |
278 | self.ignoring = true; |
279 | } else { |
280 | self.intermediates[self.intermediate_idx] = byte; |
281 | self.intermediate_idx += 1; |
282 | } |
283 | } |
284 | Action::Param => { |
285 | if self.params.is_full() { |
286 | self.ignoring = true; |
287 | return; |
288 | } |
289 | |
290 | if byte == b';' { |
291 | self.params.push(self.param); |
292 | self.param = 0; |
293 | } else if byte == b':' { |
294 | self.params.extend(self.param); |
295 | self.param = 0; |
296 | } else { |
297 | // Continue collecting bytes into param |
298 | self.param = self.param.saturating_mul(10); |
299 | self.param = self.param.saturating_add((byte - b'0' ) as u16); |
300 | } |
301 | } |
302 | Action::Clear => { |
303 | // Reset everything on ESC/CSI/DCS entry |
304 | self.intermediate_idx = 0; |
305 | self.ignoring = false; |
306 | self.param = 0; |
307 | |
308 | self.params.clear(); |
309 | } |
310 | Action::BeginUtf8 => self.process_utf8(performer, byte), |
311 | Action::Ignore => (), |
312 | Action::Nop => (), |
313 | } |
314 | } |
315 | } |
316 | |
317 | /// Build a `char` out of bytes |
318 | pub trait CharAccumulator: Default { |
319 | /// Build a `char` out of bytes |
320 | /// |
321 | /// Return `None` when more data is needed |
322 | fn add(&mut self, byte: u8) -> Option<char>; |
323 | } |
324 | |
325 | #[cfg (feature = "utf8" )] |
326 | pub type DefaultCharAccumulator = Utf8Parser; |
327 | #[cfg (not(feature = "utf8" ))] |
328 | pub type DefaultCharAccumulator = AsciiParser; |
329 | |
330 | /// Only allow parsing 7-bit ASCII |
331 | #[derive (Default, Clone, Debug, PartialEq, Eq)] |
332 | pub struct AsciiParser; |
333 | |
334 | impl CharAccumulator for AsciiParser { |
335 | fn add(&mut self, _byte: u8) -> Option<char> { |
336 | unreachable!("multi-byte UTF8 characters are unsupported" ) |
337 | } |
338 | } |
339 | |
340 | /// Allow parsing UTF-8 |
341 | #[cfg (feature = "utf8" )] |
342 | #[derive (Default, Clone, Debug, PartialEq, Eq)] |
343 | pub struct Utf8Parser { |
344 | utf8_parser: utf8::Parser, |
345 | } |
346 | |
347 | #[cfg (feature = "utf8" )] |
348 | impl CharAccumulator for Utf8Parser { |
349 | fn add(&mut self, byte: u8) -> Option<char> { |
350 | let mut c: Option = None; |
351 | let mut receiver: VtUtf8Receiver<'_> = VtUtf8Receiver(&mut c); |
352 | self.utf8_parser.advance(&mut receiver, byte); |
353 | c |
354 | } |
355 | } |
356 | |
357 | #[cfg (feature = "utf8" )] |
358 | struct VtUtf8Receiver<'a>(&'a mut Option<char>); |
359 | |
360 | #[cfg (feature = "utf8" )] |
361 | impl<'a> utf8::Receiver for VtUtf8Receiver<'a> { |
362 | fn codepoint(&mut self, c: char) { |
363 | *self.0 = Some(c); |
364 | } |
365 | |
366 | fn invalid_sequence(&mut self) { |
367 | *self.0 = Some('�' ); |
368 | } |
369 | } |
370 | |
371 | /// Performs actions requested by the [`Parser`] |
372 | /// |
373 | /// Actions in this case mean, for example, handling a CSI escape sequence describing cursor |
374 | /// movement, or simply printing characters to the screen. |
375 | /// |
376 | /// The methods on this type correspond to actions described in |
377 | /// <http://vt100.net/emu/dec_ansi_parser>. I've done my best to describe them in |
378 | /// a useful way in my own words for completeness, but the site should be |
379 | /// referenced if something isn't clear. If the site disappears at some point in |
380 | /// the future, consider checking archive.org. |
381 | pub trait Perform { |
382 | /// Draw a character to the screen and update states. |
383 | fn print(&mut self, _c: char) {} |
384 | |
385 | /// Execute a C0 or C1 control function. |
386 | fn execute(&mut self, _byte: u8) {} |
387 | |
388 | /// Invoked when a final character arrives in first part of device control string. |
389 | /// |
390 | /// The control function should be determined from the private marker, final character, and |
391 | /// execute with a parameter list. A handler should be selected for remaining characters in the |
392 | /// string; the handler function should subsequently be called by `put` for every character in |
393 | /// the control string. |
394 | /// |
395 | /// The `ignore` flag indicates that more than two intermediates arrived and |
396 | /// subsequent characters were ignored. |
397 | fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: u8) {} |
398 | |
399 | /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls |
400 | /// will also be passed to the handler. |
401 | fn put(&mut self, _byte: u8) {} |
402 | |
403 | /// Called when a device control string is terminated. |
404 | /// |
405 | /// The previously selected handler should be notified that the DCS has |
406 | /// terminated. |
407 | fn unhook(&mut self) {} |
408 | |
409 | /// Dispatch an operating system command. |
410 | fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {} |
411 | |
412 | /// A final character has arrived for a CSI sequence |
413 | /// |
414 | /// The `ignore` flag indicates that either more than two intermediates arrived |
415 | /// or the number of parameters exceeded the maximum supported length, |
416 | /// and subsequent characters were ignored. |
417 | fn csi_dispatch( |
418 | &mut self, |
419 | _params: &Params, |
420 | _intermediates: &[u8], |
421 | _ignore: bool, |
422 | _action: u8, |
423 | ) { |
424 | } |
425 | |
426 | /// The final character of an escape sequence has arrived. |
427 | /// |
428 | /// The `ignore` flag indicates that more than two intermediates arrived and |
429 | /// subsequent characters were ignored. |
430 | fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {} |
431 | } |
432 | |