1 | //! Minimal, flexible command-line parser |
2 | //! |
3 | //! As opposed to a declarative parser, this processes arguments as a stream of tokens. As lexing |
4 | //! a command-line is not context-free, we rely on the caller to decide how to interpret the |
5 | //! arguments. |
6 | //! |
7 | //! # Examples |
8 | //! |
9 | //! ```rust |
10 | //! use std::path::PathBuf; |
11 | //! use std::ffi::OsStr; |
12 | //! |
13 | //! type BoxedError = Box<dyn std::error::Error + Send + Sync>; |
14 | //! |
15 | //! #[derive(Debug)] |
16 | //! struct Args { |
17 | //! paths: Vec<PathBuf>, |
18 | //! color: Color, |
19 | //! verbosity: usize, |
20 | //! } |
21 | //! |
22 | //! #[derive(Debug)] |
23 | //! enum Color { |
24 | //! Always, |
25 | //! Auto, |
26 | //! Never, |
27 | //! } |
28 | //! |
29 | //! impl Color { |
30 | //! fn parse(s: Option<&OsStr>) -> Result<Self, BoxedError> { |
31 | //! let s = s.map(|s| s.to_str().ok_or(s)); |
32 | //! match s { |
33 | //! Some(Ok("always" )) | Some(Ok("" )) | None => { |
34 | //! Ok(Color::Always) |
35 | //! } |
36 | //! Some(Ok("auto" )) => { |
37 | //! Ok(Color::Auto) |
38 | //! } |
39 | //! Some(Ok("never" )) => { |
40 | //! Ok(Color::Never) |
41 | //! } |
42 | //! Some(invalid) => { |
43 | //! Err(format!("Invalid value for `--color`, {invalid:?}" ).into()) |
44 | //! } |
45 | //! } |
46 | //! } |
47 | //! } |
48 | //! |
49 | //! fn parse_args( |
50 | //! raw: impl IntoIterator<Item=impl Into<std::ffi::OsString>> |
51 | //! ) -> Result<Args, BoxedError> { |
52 | //! let mut args = Args { |
53 | //! paths: Vec::new(), |
54 | //! color: Color::Auto, |
55 | //! verbosity: 0, |
56 | //! }; |
57 | //! |
58 | //! let raw = clap_lex::RawArgs::new(raw); |
59 | //! let mut cursor = raw.cursor(); |
60 | //! raw.next(&mut cursor); // Skip the bin |
61 | //! while let Some(arg) = raw.next(&mut cursor) { |
62 | //! if arg.is_escape() { |
63 | //! args.paths.extend(raw.remaining(&mut cursor).map(PathBuf::from)); |
64 | //! } else if arg.is_stdio() { |
65 | //! args.paths.push(PathBuf::from("-" )); |
66 | //! } else if let Some((long, value)) = arg.to_long() { |
67 | //! match long { |
68 | //! Ok("verbose" ) => { |
69 | //! if let Some(value) = value { |
70 | //! return Err(format!("`--verbose` does not take a value, got `{value:?}`" ).into()); |
71 | //! } |
72 | //! args.verbosity += 1; |
73 | //! } |
74 | //! Ok("color" ) => { |
75 | //! args.color = Color::parse(value)?; |
76 | //! } |
77 | //! _ => { |
78 | //! return Err( |
79 | //! format!("Unexpected flag: --{}" , arg.display()).into() |
80 | //! ); |
81 | //! } |
82 | //! } |
83 | //! } else if let Some(mut shorts) = arg.to_short() { |
84 | //! while let Some(short) = shorts.next_flag() { |
85 | //! match short { |
86 | //! Ok('v' ) => { |
87 | //! args.verbosity += 1; |
88 | //! } |
89 | //! Ok('c' ) => { |
90 | //! let value = shorts.next_value_os(); |
91 | //! args.color = Color::parse(value)?; |
92 | //! } |
93 | //! Ok(c) => { |
94 | //! return Err(format!("Unexpected flag: -{c}" ).into()); |
95 | //! } |
96 | //! Err(e) => { |
97 | //! return Err(format!("Unexpected flag: -{}" , e.to_string_lossy()).into()); |
98 | //! } |
99 | //! } |
100 | //! } |
101 | //! } else { |
102 | //! args.paths.push(PathBuf::from(arg.to_value_os().to_owned())); |
103 | //! } |
104 | //! } |
105 | //! |
106 | //! Ok(args) |
107 | //! } |
108 | //! |
109 | //! let args = parse_args(["bin" , "--hello" , "world" ]); |
110 | //! println!("{args:?}" ); |
111 | //! ``` |
112 | |
113 | mod ext; |
114 | |
115 | use std::ffi::OsStr; |
116 | use std::ffi::OsString; |
117 | |
118 | pub use std::io::SeekFrom; |
119 | |
120 | pub use ext::OsStrExt; |
121 | |
122 | /// Command-line arguments |
123 | #[derive (Default, Clone, Debug, PartialEq, Eq)] |
124 | pub struct RawArgs { |
125 | items: Vec<OsString>, |
126 | } |
127 | |
128 | impl RawArgs { |
129 | //// Create an argument list to parse |
130 | /// |
131 | /// **NOTE:** The argument returned will be the current binary. |
132 | /// |
133 | /// # Example |
134 | /// |
135 | /// ```rust,no_run |
136 | /// # use std::path::PathBuf; |
137 | /// let raw = clap_lex::RawArgs::from_args(); |
138 | /// let mut cursor = raw.cursor(); |
139 | /// let _bin = raw.next_os(&mut cursor); |
140 | /// |
141 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); |
142 | /// println!("{paths:?}" ); |
143 | /// ``` |
144 | pub fn from_args() -> Self { |
145 | Self::new(std::env::args_os()) |
146 | } |
147 | |
148 | //// Create an argument list to parse |
149 | /// |
150 | /// # Example |
151 | /// |
152 | /// ```rust,no_run |
153 | /// # use std::path::PathBuf; |
154 | /// let raw = clap_lex::RawArgs::new(["bin" , "foo.txt" ]); |
155 | /// let mut cursor = raw.cursor(); |
156 | /// let _bin = raw.next_os(&mut cursor); |
157 | /// |
158 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); |
159 | /// println!("{paths:?}" ); |
160 | /// ``` |
161 | pub fn new(iter: impl IntoIterator<Item = impl Into<std::ffi::OsString>>) -> Self { |
162 | let iter = iter.into_iter(); |
163 | Self::from(iter) |
164 | } |
165 | |
166 | /// Create a cursor for walking the arguments |
167 | /// |
168 | /// # Example |
169 | /// |
170 | /// ```rust,no_run |
171 | /// # use std::path::PathBuf; |
172 | /// let raw = clap_lex::RawArgs::new(["bin" , "foo.txt" ]); |
173 | /// let mut cursor = raw.cursor(); |
174 | /// let _bin = raw.next_os(&mut cursor); |
175 | /// |
176 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); |
177 | /// println!("{paths:?}" ); |
178 | /// ``` |
179 | pub fn cursor(&self) -> ArgCursor { |
180 | ArgCursor::new() |
181 | } |
182 | |
183 | /// Advance the cursor, returning the next [`ParsedArg`] |
184 | pub fn next(&self, cursor: &mut ArgCursor) -> Option<ParsedArg<'_>> { |
185 | self.next_os(cursor).map(ParsedArg::new) |
186 | } |
187 | |
188 | /// Advance the cursor, returning a raw argument value. |
189 | pub fn next_os(&self, cursor: &mut ArgCursor) -> Option<&OsStr> { |
190 | let next = self.items.get(cursor.cursor).map(|s| s.as_os_str()); |
191 | cursor.cursor = cursor.cursor.saturating_add(1); |
192 | next |
193 | } |
194 | |
195 | /// Return the next [`ParsedArg`] |
196 | pub fn peek(&self, cursor: &ArgCursor) -> Option<ParsedArg<'_>> { |
197 | self.peek_os(cursor).map(ParsedArg::new) |
198 | } |
199 | |
200 | /// Return a raw argument value. |
201 | pub fn peek_os(&self, cursor: &ArgCursor) -> Option<&OsStr> { |
202 | self.items.get(cursor.cursor).map(|s| s.as_os_str()) |
203 | } |
204 | |
205 | /// Return all remaining raw arguments, advancing the cursor to the end |
206 | /// |
207 | /// # Example |
208 | /// |
209 | /// ```rust,no_run |
210 | /// # use std::path::PathBuf; |
211 | /// let raw = clap_lex::RawArgs::new(["bin" , "foo.txt" ]); |
212 | /// let mut cursor = raw.cursor(); |
213 | /// let _bin = raw.next_os(&mut cursor); |
214 | /// |
215 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); |
216 | /// println!("{paths:?}" ); |
217 | /// ``` |
218 | pub fn remaining(&self, cursor: &mut ArgCursor) -> impl Iterator<Item = &OsStr> { |
219 | let remaining = self.items[cursor.cursor..].iter().map(|s| s.as_os_str()); |
220 | cursor.cursor = self.items.len(); |
221 | remaining |
222 | } |
223 | |
224 | /// Adjust the cursor's position |
225 | pub fn seek(&self, cursor: &mut ArgCursor, pos: SeekFrom) { |
226 | let pos = match pos { |
227 | SeekFrom::Start(pos) => pos, |
228 | SeekFrom::End(pos) => (self.items.len() as i64).saturating_add(pos).max(0) as u64, |
229 | SeekFrom::Current(pos) => (cursor.cursor as i64).saturating_add(pos).max(0) as u64, |
230 | }; |
231 | let pos = (pos as usize).min(self.items.len()); |
232 | cursor.cursor = pos; |
233 | } |
234 | |
235 | /// Inject arguments before the [`RawArgs::next`] |
236 | pub fn insert( |
237 | &mut self, |
238 | cursor: &ArgCursor, |
239 | insert_items: impl IntoIterator<Item = impl Into<OsString>>, |
240 | ) { |
241 | self.items.splice( |
242 | cursor.cursor..cursor.cursor, |
243 | insert_items.into_iter().map(Into::into), |
244 | ); |
245 | } |
246 | |
247 | /// Any remaining args? |
248 | pub fn is_end(&self, cursor: &ArgCursor) -> bool { |
249 | self.peek_os(cursor).is_none() |
250 | } |
251 | } |
252 | |
253 | impl<I, T> From<I> for RawArgs |
254 | where |
255 | I: Iterator<Item = T>, |
256 | T: Into<OsString>, |
257 | { |
258 | fn from(val: I) -> Self { |
259 | Self { |
260 | items: val.map(|x: T| x.into()).collect(), |
261 | } |
262 | } |
263 | } |
264 | |
265 | /// Position within [`RawArgs`] |
266 | #[derive (Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] |
267 | pub struct ArgCursor { |
268 | cursor: usize, |
269 | } |
270 | |
271 | impl ArgCursor { |
272 | fn new() -> Self { |
273 | Self { cursor: 0 } |
274 | } |
275 | } |
276 | |
277 | /// Command-line Argument |
278 | #[derive (Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |
279 | pub struct ParsedArg<'s> { |
280 | inner: &'s OsStr, |
281 | } |
282 | |
283 | impl<'s> ParsedArg<'s> { |
284 | fn new(inner: &'s OsStr) -> Self { |
285 | Self { inner } |
286 | } |
287 | |
288 | /// Argument is length of 0 |
289 | pub fn is_empty(&self) -> bool { |
290 | self.inner.is_empty() |
291 | } |
292 | |
293 | /// Does the argument look like a stdio argument (`-`) |
294 | pub fn is_stdio(&self) -> bool { |
295 | self.inner == "-" |
296 | } |
297 | |
298 | /// Does the argument look like an argument escape (`--`) |
299 | pub fn is_escape(&self) -> bool { |
300 | self.inner == "--" |
301 | } |
302 | |
303 | /// Does the argument look like a negative number? |
304 | /// |
305 | /// This won't parse the number in full but attempts to see if this looks |
306 | /// like something along the lines of `-3`, `-0.3`, or `-33.03` |
307 | pub fn is_negative_number(&self) -> bool { |
308 | self.to_value() |
309 | .ok() |
310 | .and_then(|s| Some(is_number(s.strip_prefix('-' )?))) |
311 | .unwrap_or_default() |
312 | } |
313 | |
314 | /// Treat as a long-flag |
315 | pub fn to_long(&self) -> Option<(Result<&str, &OsStr>, Option<&OsStr>)> { |
316 | let raw = self.inner; |
317 | let remainder = raw.strip_prefix("--" )?; |
318 | if remainder.is_empty() { |
319 | debug_assert!(self.is_escape()); |
320 | return None; |
321 | } |
322 | |
323 | let (flag, value) = if let Some((p0, p1)) = remainder.split_once("=" ) { |
324 | (p0, Some(p1)) |
325 | } else { |
326 | (remainder, None) |
327 | }; |
328 | let flag = flag.to_str().ok_or(flag); |
329 | Some((flag, value)) |
330 | } |
331 | |
332 | /// Can treat as a long-flag |
333 | pub fn is_long(&self) -> bool { |
334 | self.inner.starts_with("--" ) && !self.is_escape() |
335 | } |
336 | |
337 | /// Treat as a short-flag |
338 | pub fn to_short(&self) -> Option<ShortFlags<'_>> { |
339 | if let Some(remainder_os) = self.inner.strip_prefix("-" ) { |
340 | if remainder_os.starts_with("-" ) { |
341 | None |
342 | } else if remainder_os.is_empty() { |
343 | debug_assert!(self.is_stdio()); |
344 | None |
345 | } else { |
346 | Some(ShortFlags::new(remainder_os)) |
347 | } |
348 | } else { |
349 | None |
350 | } |
351 | } |
352 | |
353 | /// Can treat as a short-flag |
354 | pub fn is_short(&self) -> bool { |
355 | self.inner.starts_with("-" ) && !self.is_stdio() && !self.inner.starts_with("--" ) |
356 | } |
357 | |
358 | /// Treat as a value |
359 | /// |
360 | /// **NOTE:** May return a flag or an escape. |
361 | pub fn to_value_os(&self) -> &OsStr { |
362 | self.inner |
363 | } |
364 | |
365 | /// Treat as a value |
366 | /// |
367 | /// **NOTE:** May return a flag or an escape. |
368 | pub fn to_value(&self) -> Result<&str, &OsStr> { |
369 | self.inner.to_str().ok_or(self.inner) |
370 | } |
371 | |
372 | /// Safely print an argument that may contain non-UTF8 content |
373 | /// |
374 | /// This may perform lossy conversion, depending on the platform. If you would like an implementation which escapes the path please use Debug instead. |
375 | pub fn display(&self) -> impl std::fmt::Display + '_ { |
376 | self.inner.to_string_lossy() |
377 | } |
378 | } |
379 | |
380 | /// Walk through short flags within a [`ParsedArg`] |
381 | #[derive (Clone, Debug)] |
382 | pub struct ShortFlags<'s> { |
383 | inner: &'s OsStr, |
384 | utf8_prefix: std::str::CharIndices<'s>, |
385 | invalid_suffix: Option<&'s OsStr>, |
386 | } |
387 | |
388 | impl<'s> ShortFlags<'s> { |
389 | fn new(inner: &'s OsStr) -> Self { |
390 | let (utf8_prefix, invalid_suffix) = split_nonutf8_once(inner); |
391 | let utf8_prefix = utf8_prefix.char_indices(); |
392 | Self { |
393 | inner, |
394 | utf8_prefix, |
395 | invalid_suffix, |
396 | } |
397 | } |
398 | |
399 | /// Move the iterator forward by `n` short flags |
400 | pub fn advance_by(&mut self, n: usize) -> Result<(), usize> { |
401 | for i in 0..n { |
402 | self.next().ok_or(i)?.map_err(|_| i)?; |
403 | } |
404 | Ok(()) |
405 | } |
406 | |
407 | /// No short flags left |
408 | pub fn is_empty(&self) -> bool { |
409 | self.invalid_suffix.is_none() && self.utf8_prefix.as_str().is_empty() |
410 | } |
411 | |
412 | /// Does the short flag look like a number |
413 | /// |
414 | /// Ideally call this before doing any iterator |
415 | pub fn is_negative_number(&self) -> bool { |
416 | self.invalid_suffix.is_none() && is_number(self.utf8_prefix.as_str()) |
417 | } |
418 | |
419 | /// Advance the iterator, returning the next short flag on success |
420 | /// |
421 | /// On error, returns the invalid-UTF8 value |
422 | pub fn next_flag(&mut self) -> Option<Result<char, &'s OsStr>> { |
423 | if let Some((_, flag)) = self.utf8_prefix.next() { |
424 | return Some(Ok(flag)); |
425 | } |
426 | |
427 | if let Some(suffix) = self.invalid_suffix { |
428 | self.invalid_suffix = None; |
429 | return Some(Err(suffix)); |
430 | } |
431 | |
432 | None |
433 | } |
434 | |
435 | /// Advance the iterator, returning everything left as a value |
436 | pub fn next_value_os(&mut self) -> Option<&'s OsStr> { |
437 | if let Some((index, _)) = self.utf8_prefix.next() { |
438 | self.utf8_prefix = "" .char_indices(); |
439 | self.invalid_suffix = None; |
440 | // SAFETY: `char_indices` ensures `index` is at a valid UTF-8 boundary |
441 | let remainder = unsafe { ext::split_at(self.inner, index).1 }; |
442 | return Some(remainder); |
443 | } |
444 | |
445 | if let Some(suffix) = self.invalid_suffix { |
446 | self.invalid_suffix = None; |
447 | return Some(suffix); |
448 | } |
449 | |
450 | None |
451 | } |
452 | } |
453 | |
454 | impl<'s> Iterator for ShortFlags<'s> { |
455 | type Item = Result<char, &'s OsStr>; |
456 | |
457 | fn next(&mut self) -> Option<Self::Item> { |
458 | self.next_flag() |
459 | } |
460 | } |
461 | |
462 | fn split_nonutf8_once(b: &OsStr) -> (&str, Option<&OsStr>) { |
463 | match b.try_str() { |
464 | Ok(s: &str) => (s, None), |
465 | Err(err: Utf8Error) => { |
466 | // SAFETY: `char_indices` ensures `index` is at a valid UTF-8 boundary |
467 | let (valid: &OsStr, after_valid: &OsStr) = unsafe { ext::split_at(os:b, index:err.valid_up_to()) }; |
468 | let valid: &str = valid.try_str().unwrap(); |
469 | (valid, Some(after_valid)) |
470 | } |
471 | } |
472 | } |
473 | |
474 | fn is_number(arg: &str) -> bool { |
475 | // Return true if this looks like an integer or a float where it's all |
476 | // digits plus an optional single dot after some digits. |
477 | // |
478 | // For floats allow forms such as `1.`, `1.2`, `1.2e10`, etc. |
479 | let mut seen_dot = false; |
480 | let mut position_of_e = None; |
481 | for (i, c) in arg.as_bytes().iter().enumerate() { |
482 | match c { |
483 | // Digits are always valid |
484 | b'0' ..=b'9' => {} |
485 | |
486 | // Allow a `.`, but only one, only if it comes before an |
487 | // optional exponent, and only if it's not the first character. |
488 | b'.' if !seen_dot && position_of_e.is_none() && i > 0 => seen_dot = true, |
489 | |
490 | // Allow an exponent `e` but only at most one after the first |
491 | // character. |
492 | b'e' if position_of_e.is_none() && i > 0 => position_of_e = Some(i), |
493 | |
494 | _ => return false, |
495 | } |
496 | } |
497 | |
498 | // Disallow `-1e` which isn't a valid float since it doesn't actually have |
499 | // an exponent. |
500 | match position_of_e { |
501 | Some(i) => i != arg.len() - 1, |
502 | None => true, |
503 | } |
504 | } |
505 | |