1 | //! Minimal, flexible command-line parser |
2 | //! |
3 | //! As opposed to a declarative parser, this processes arguments as a stream of tokens. As lexing |
4 | //! a command-line is not context-free, we rely on the caller to decide how to interpret the |
5 | //! arguments. |
6 | //! |
7 | //! # Examples |
8 | //! |
9 | //! ```rust |
10 | //! use std::path::PathBuf; |
11 | //! use std::ffi::OsStr; |
12 | //! |
13 | //! type BoxedError = Box<dyn std::error::Error + Send + Sync>; |
14 | //! |
15 | //! #[derive(Debug)] |
16 | //! struct Args { |
17 | //! paths: Vec<PathBuf>, |
18 | //! color: Color, |
19 | //! verbosity: usize, |
20 | //! } |
21 | //! |
22 | //! #[derive(Debug)] |
23 | //! enum Color { |
24 | //! Always, |
25 | //! Auto, |
26 | //! Never, |
27 | //! } |
28 | //! |
29 | //! impl Color { |
30 | //! fn parse(s: Option<&OsStr>) -> Result<Self, BoxedError> { |
31 | //! let s = s.map(|s| s.to_str().ok_or(s)); |
32 | //! match s { |
33 | //! Some(Ok("always" )) | Some(Ok("" )) | None => { |
34 | //! Ok(Color::Always) |
35 | //! } |
36 | //! Some(Ok("auto" )) => { |
37 | //! Ok(Color::Auto) |
38 | //! } |
39 | //! Some(Ok("never" )) => { |
40 | //! Ok(Color::Never) |
41 | //! } |
42 | //! Some(invalid) => { |
43 | //! Err(format!("Invalid value for `--color`, {invalid:?}" ).into()) |
44 | //! } |
45 | //! } |
46 | //! } |
47 | //! } |
48 | //! |
49 | //! fn parse_args( |
50 | //! raw: impl IntoIterator<Item=impl Into<std::ffi::OsString>> |
51 | //! ) -> Result<Args, BoxedError> { |
52 | //! let mut args = Args { |
53 | //! paths: Vec::new(), |
54 | //! color: Color::Auto, |
55 | //! verbosity: 0, |
56 | //! }; |
57 | //! |
58 | //! let raw = clap_lex::RawArgs::new(raw); |
59 | //! let mut cursor = raw.cursor(); |
60 | //! raw.next(&mut cursor); // Skip the bin |
61 | //! while let Some(arg) = raw.next(&mut cursor) { |
62 | //! if arg.is_escape() { |
63 | //! args.paths.extend(raw.remaining(&mut cursor).map(PathBuf::from)); |
64 | //! } else if arg.is_stdio() { |
65 | //! args.paths.push(PathBuf::from("-" )); |
66 | //! } else if let Some((long, value)) = arg.to_long() { |
67 | //! match long { |
68 | //! Ok("verbose" ) => { |
69 | //! if let Some(value) = value { |
70 | //! return Err(format!("`--verbose` does not take a value, got `{value:?}`" ).into()); |
71 | //! } |
72 | //! args.verbosity += 1; |
73 | //! } |
74 | //! Ok("color" ) => { |
75 | //! args.color = Color::parse(value)?; |
76 | //! } |
77 | //! _ => { |
78 | //! return Err( |
79 | //! format!("Unexpected flag: --{}" , arg.display()).into() |
80 | //! ); |
81 | //! } |
82 | //! } |
83 | //! } else if let Some(mut shorts) = arg.to_short() { |
84 | //! while let Some(short) = shorts.next_flag() { |
85 | //! match short { |
86 | //! Ok('v' ) => { |
87 | //! args.verbosity += 1; |
88 | //! } |
89 | //! Ok('c' ) => { |
90 | //! let value = shorts.next_value_os(); |
91 | //! args.color = Color::parse(value)?; |
92 | //! } |
93 | //! Ok(c) => { |
94 | //! return Err(format!("Unexpected flag: -{c}" ).into()); |
95 | //! } |
96 | //! Err(e) => { |
97 | //! return Err(format!("Unexpected flag: -{}" , e.to_string_lossy()).into()); |
98 | //! } |
99 | //! } |
100 | //! } |
101 | //! } else { |
102 | //! args.paths.push(PathBuf::from(arg.to_value_os().to_owned())); |
103 | //! } |
104 | //! } |
105 | //! |
106 | //! Ok(args) |
107 | //! } |
108 | //! |
109 | //! let args = parse_args(["bin" , "--hello" , "world" ]); |
110 | //! println!("{args:?}" ); |
111 | //! ``` |
112 | |
113 | #![cfg_attr (docsrs, feature(doc_auto_cfg))] |
114 | #![warn (missing_docs)] |
115 | #![warn (clippy::print_stderr)] |
116 | #![warn (clippy::print_stdout)] |
117 | |
118 | mod ext; |
119 | |
120 | use std::ffi::OsStr; |
121 | use std::ffi::OsString; |
122 | |
123 | pub use std::io::SeekFrom; |
124 | |
125 | pub use ext::OsStrExt; |
126 | |
127 | /// Command-line arguments |
128 | #[derive (Default, Clone, Debug, PartialEq, Eq)] |
129 | pub struct RawArgs { |
130 | items: Vec<OsString>, |
131 | } |
132 | |
133 | impl RawArgs { |
134 | //// Create an argument list to parse |
135 | /// |
136 | /// <div class="warning"> |
137 | /// |
138 | /// **NOTE:** The argument returned will be the current binary. |
139 | /// |
140 | /// </div> |
141 | /// |
142 | /// # Example |
143 | /// |
144 | /// ```rust,no_run |
145 | /// # use std::path::PathBuf; |
146 | /// let raw = clap_lex::RawArgs::from_args(); |
147 | /// let mut cursor = raw.cursor(); |
148 | /// let _bin = raw.next_os(&mut cursor); |
149 | /// |
150 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); |
151 | /// println!("{paths:?}" ); |
152 | /// ``` |
153 | pub fn from_args() -> Self { |
154 | Self::new(std::env::args_os()) |
155 | } |
156 | |
157 | //// Create an argument list to parse |
158 | /// |
159 | /// # Example |
160 | /// |
161 | /// ```rust,no_run |
162 | /// # use std::path::PathBuf; |
163 | /// let raw = clap_lex::RawArgs::new(["bin" , "foo.txt" ]); |
164 | /// let mut cursor = raw.cursor(); |
165 | /// let _bin = raw.next_os(&mut cursor); |
166 | /// |
167 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); |
168 | /// println!("{paths:?}" ); |
169 | /// ``` |
170 | pub fn new(iter: impl IntoIterator<Item = impl Into<OsString>>) -> Self { |
171 | let iter = iter.into_iter(); |
172 | Self::from(iter) |
173 | } |
174 | |
175 | /// Create a cursor for walking the arguments |
176 | /// |
177 | /// # Example |
178 | /// |
179 | /// ```rust,no_run |
180 | /// # use std::path::PathBuf; |
181 | /// let raw = clap_lex::RawArgs::new(["bin" , "foo.txt" ]); |
182 | /// let mut cursor = raw.cursor(); |
183 | /// let _bin = raw.next_os(&mut cursor); |
184 | /// |
185 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); |
186 | /// println!("{paths:?}" ); |
187 | /// ``` |
188 | pub fn cursor(&self) -> ArgCursor { |
189 | ArgCursor::new() |
190 | } |
191 | |
192 | /// Advance the cursor, returning the next [`ParsedArg`] |
193 | pub fn next(&self, cursor: &mut ArgCursor) -> Option<ParsedArg<'_>> { |
194 | self.next_os(cursor).map(ParsedArg::new) |
195 | } |
196 | |
197 | /// Advance the cursor, returning a raw argument value. |
198 | pub fn next_os(&self, cursor: &mut ArgCursor) -> Option<&OsStr> { |
199 | let next = self.items.get(cursor.cursor).map(|s| s.as_os_str()); |
200 | cursor.cursor = cursor.cursor.saturating_add(1); |
201 | next |
202 | } |
203 | |
204 | /// Return the next [`ParsedArg`] |
205 | pub fn peek(&self, cursor: &ArgCursor) -> Option<ParsedArg<'_>> { |
206 | self.peek_os(cursor).map(ParsedArg::new) |
207 | } |
208 | |
209 | /// Return a raw argument value. |
210 | pub fn peek_os(&self, cursor: &ArgCursor) -> Option<&OsStr> { |
211 | self.items.get(cursor.cursor).map(|s| s.as_os_str()) |
212 | } |
213 | |
214 | /// Return all remaining raw arguments, advancing the cursor to the end |
215 | /// |
216 | /// # Example |
217 | /// |
218 | /// ```rust,no_run |
219 | /// # use std::path::PathBuf; |
220 | /// let raw = clap_lex::RawArgs::new(["bin" , "foo.txt" ]); |
221 | /// let mut cursor = raw.cursor(); |
222 | /// let _bin = raw.next_os(&mut cursor); |
223 | /// |
224 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); |
225 | /// println!("{paths:?}" ); |
226 | /// ``` |
227 | pub fn remaining(&self, cursor: &mut ArgCursor) -> impl Iterator<Item = &OsStr> { |
228 | let remaining = self.items[cursor.cursor..].iter().map(|s| s.as_os_str()); |
229 | cursor.cursor = self.items.len(); |
230 | remaining |
231 | } |
232 | |
233 | /// Adjust the cursor's position |
234 | pub fn seek(&self, cursor: &mut ArgCursor, pos: SeekFrom) { |
235 | let pos = match pos { |
236 | SeekFrom::Start(pos) => pos, |
237 | SeekFrom::End(pos) => (self.items.len() as i64).saturating_add(pos).max(0) as u64, |
238 | SeekFrom::Current(pos) => (cursor.cursor as i64).saturating_add(pos).max(0) as u64, |
239 | }; |
240 | let pos = (pos as usize).min(self.items.len()); |
241 | cursor.cursor = pos; |
242 | } |
243 | |
244 | /// Inject arguments before the [`RawArgs::next`] |
245 | pub fn insert( |
246 | &mut self, |
247 | cursor: &ArgCursor, |
248 | insert_items: impl IntoIterator<Item = impl Into<OsString>>, |
249 | ) { |
250 | self.items.splice( |
251 | cursor.cursor..cursor.cursor, |
252 | insert_items.into_iter().map(Into::into), |
253 | ); |
254 | } |
255 | |
256 | /// Any remaining args? |
257 | pub fn is_end(&self, cursor: &ArgCursor) -> bool { |
258 | self.peek_os(cursor).is_none() |
259 | } |
260 | } |
261 | |
262 | impl<I, T> From<I> for RawArgs |
263 | where |
264 | I: Iterator<Item = T>, |
265 | T: Into<OsString>, |
266 | { |
267 | fn from(val: I) -> Self { |
268 | Self { |
269 | items: val.map(|x: T| x.into()).collect(), |
270 | } |
271 | } |
272 | } |
273 | |
274 | /// Position within [`RawArgs`] |
275 | #[derive (Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] |
276 | pub struct ArgCursor { |
277 | cursor: usize, |
278 | } |
279 | |
280 | impl ArgCursor { |
281 | fn new() -> Self { |
282 | Self { cursor: 0 } |
283 | } |
284 | } |
285 | |
286 | /// Command-line Argument |
287 | #[derive (Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |
288 | pub struct ParsedArg<'s> { |
289 | inner: &'s OsStr, |
290 | } |
291 | |
292 | impl<'s> ParsedArg<'s> { |
293 | fn new(inner: &'s OsStr) -> Self { |
294 | Self { inner } |
295 | } |
296 | |
297 | /// Argument is length of 0 |
298 | pub fn is_empty(&self) -> bool { |
299 | self.inner.is_empty() |
300 | } |
301 | |
302 | /// Does the argument look like a stdio argument (`-`) |
303 | pub fn is_stdio(&self) -> bool { |
304 | self.inner == "-" |
305 | } |
306 | |
307 | /// Does the argument look like an argument escape (`--`) |
308 | pub fn is_escape(&self) -> bool { |
309 | self.inner == "--" |
310 | } |
311 | |
312 | /// Does the argument look like a negative number? |
313 | /// |
314 | /// This won't parse the number in full but attempts to see if this looks |
315 | /// like something along the lines of `-3`, `-0.3`, or `-33.03` |
316 | pub fn is_negative_number(&self) -> bool { |
317 | self.to_value() |
318 | .ok() |
319 | .and_then(|s| Some(is_number(s.strip_prefix('-' )?))) |
320 | .unwrap_or_default() |
321 | } |
322 | |
323 | /// Treat as a long-flag |
324 | pub fn to_long(&self) -> Option<(Result<&str, &OsStr>, Option<&OsStr>)> { |
325 | let raw = self.inner; |
326 | let remainder = raw.strip_prefix("--" )?; |
327 | if remainder.is_empty() { |
328 | debug_assert!(self.is_escape()); |
329 | return None; |
330 | } |
331 | |
332 | let (flag, value) = if let Some((p0, p1)) = remainder.split_once("=" ) { |
333 | (p0, Some(p1)) |
334 | } else { |
335 | (remainder, None) |
336 | }; |
337 | let flag = flag.to_str().ok_or(flag); |
338 | Some((flag, value)) |
339 | } |
340 | |
341 | /// Can treat as a long-flag |
342 | pub fn is_long(&self) -> bool { |
343 | self.inner.starts_with("--" ) && !self.is_escape() |
344 | } |
345 | |
346 | /// Treat as a short-flag |
347 | pub fn to_short(&self) -> Option<ShortFlags<'_>> { |
348 | if let Some(remainder_os) = self.inner.strip_prefix("-" ) { |
349 | if remainder_os.starts_with("-" ) { |
350 | None |
351 | } else if remainder_os.is_empty() { |
352 | debug_assert!(self.is_stdio()); |
353 | None |
354 | } else { |
355 | Some(ShortFlags::new(remainder_os)) |
356 | } |
357 | } else { |
358 | None |
359 | } |
360 | } |
361 | |
362 | /// Can treat as a short-flag |
363 | pub fn is_short(&self) -> bool { |
364 | self.inner.starts_with("-" ) && !self.is_stdio() && !self.inner.starts_with("--" ) |
365 | } |
366 | |
367 | /// Treat as a value |
368 | /// |
369 | /// <div class="warning"> |
370 | /// |
371 | /// **NOTE:** May return a flag or an escape. |
372 | /// |
373 | /// </div> |
374 | pub fn to_value_os(&self) -> &OsStr { |
375 | self.inner |
376 | } |
377 | |
378 | /// Treat as a value |
379 | /// |
380 | /// <div class="warning"> |
381 | /// |
382 | /// **NOTE:** May return a flag or an escape. |
383 | /// |
384 | /// </div> |
385 | pub fn to_value(&self) -> Result<&str, &OsStr> { |
386 | self.inner.to_str().ok_or(self.inner) |
387 | } |
388 | |
389 | /// Safely print an argument that may contain non-UTF8 content |
390 | /// |
391 | /// This may perform lossy conversion, depending on the platform. If you would like an implementation which escapes the path please use Debug instead. |
392 | pub fn display(&self) -> impl std::fmt::Display + '_ { |
393 | self.inner.to_string_lossy() |
394 | } |
395 | } |
396 | |
397 | /// Walk through short flags within a [`ParsedArg`] |
398 | #[derive (Clone, Debug)] |
399 | pub struct ShortFlags<'s> { |
400 | inner: &'s OsStr, |
401 | utf8_prefix: std::str::CharIndices<'s>, |
402 | invalid_suffix: Option<&'s OsStr>, |
403 | } |
404 | |
405 | impl<'s> ShortFlags<'s> { |
406 | fn new(inner: &'s OsStr) -> Self { |
407 | let (utf8_prefix, invalid_suffix) = split_nonutf8_once(inner); |
408 | let utf8_prefix = utf8_prefix.char_indices(); |
409 | Self { |
410 | inner, |
411 | utf8_prefix, |
412 | invalid_suffix, |
413 | } |
414 | } |
415 | |
416 | /// Move the iterator forward by `n` short flags |
417 | pub fn advance_by(&mut self, n: usize) -> Result<(), usize> { |
418 | for i in 0..n { |
419 | self.next().ok_or(i)?.map_err(|_| i)?; |
420 | } |
421 | Ok(()) |
422 | } |
423 | |
424 | /// No short flags left |
425 | pub fn is_empty(&self) -> bool { |
426 | self.invalid_suffix.is_none() && self.utf8_prefix.as_str().is_empty() |
427 | } |
428 | |
429 | /// Does the short flag look like a number |
430 | /// |
431 | /// Ideally call this before doing any iterator |
432 | pub fn is_negative_number(&self) -> bool { |
433 | self.invalid_suffix.is_none() && is_number(self.utf8_prefix.as_str()) |
434 | } |
435 | |
436 | /// Advance the iterator, returning the next short flag on success |
437 | /// |
438 | /// On error, returns the invalid-UTF8 value |
439 | pub fn next_flag(&mut self) -> Option<Result<char, &'s OsStr>> { |
440 | if let Some((_, flag)) = self.utf8_prefix.next() { |
441 | return Some(Ok(flag)); |
442 | } |
443 | |
444 | if let Some(suffix) = self.invalid_suffix { |
445 | self.invalid_suffix = None; |
446 | return Some(Err(suffix)); |
447 | } |
448 | |
449 | None |
450 | } |
451 | |
452 | /// Advance the iterator, returning everything left as a value |
453 | pub fn next_value_os(&mut self) -> Option<&'s OsStr> { |
454 | if let Some((index, _)) = self.utf8_prefix.next() { |
455 | self.utf8_prefix = "" .char_indices(); |
456 | self.invalid_suffix = None; |
457 | // SAFETY: `char_indices` ensures `index` is at a valid UTF-8 boundary |
458 | let remainder = unsafe { ext::split_at(self.inner, index).1 }; |
459 | return Some(remainder); |
460 | } |
461 | |
462 | if let Some(suffix) = self.invalid_suffix { |
463 | self.invalid_suffix = None; |
464 | return Some(suffix); |
465 | } |
466 | |
467 | None |
468 | } |
469 | } |
470 | |
471 | impl<'s> Iterator for ShortFlags<'s> { |
472 | type Item = Result<char, &'s OsStr>; |
473 | |
474 | fn next(&mut self) -> Option<Self::Item> { |
475 | self.next_flag() |
476 | } |
477 | } |
478 | |
479 | fn split_nonutf8_once(b: &OsStr) -> (&str, Option<&OsStr>) { |
480 | match b.try_str() { |
481 | Ok(s: &str) => (s, None), |
482 | Err(err: Utf8Error) => { |
483 | // SAFETY: `err.valid_up_to()`, which came from str::from_utf8(), is guaranteed |
484 | // to be a valid UTF8 boundary |
485 | let (valid: &OsStr, after_valid: &OsStr) = unsafe { ext::split_at(os:b, index:err.valid_up_to()) }; |
486 | let valid: &str = valid.try_str().unwrap(); |
487 | (valid, Some(after_valid)) |
488 | } |
489 | } |
490 | } |
491 | |
492 | fn is_number(arg: &str) -> bool { |
493 | // Return true if this looks like an integer or a float where it's all |
494 | // digits plus an optional single dot after some digits. |
495 | // |
496 | // For floats allow forms such as `1.`, `1.2`, `1.2e10`, etc. |
497 | let mut seen_dot = false; |
498 | let mut position_of_e = None; |
499 | for (i, c) in arg.as_bytes().iter().enumerate() { |
500 | match c { |
501 | // Digits are always valid |
502 | b'0' ..=b'9' => {} |
503 | |
504 | // Allow a `.`, but only one, only if it comes before an |
505 | // optional exponent, and only if it's not the first character. |
506 | b'.' if !seen_dot && position_of_e.is_none() && i > 0 => seen_dot = true, |
507 | |
508 | // Allow an exponent `e`/`E` but only at most one after the first |
509 | // character. |
510 | b'e' | b'E' if position_of_e.is_none() && i > 0 => position_of_e = Some(i), |
511 | |
512 | _ => return false, |
513 | } |
514 | } |
515 | |
516 | // Disallow `-1e` which isn't a valid float since it doesn't actually have |
517 | // an exponent. |
518 | match position_of_e { |
519 | Some(i) => i != arg.len() - 1, |
520 | None => true, |
521 | } |
522 | } |
523 | |