1//! A pathologically simple command line argument parser.
2//!
3//! Most argument parsers are declarative: you tell them what to parse,
4//! and they do it.
5//!
6//! This one provides you with a stream of options and values and lets you
7//! figure out the rest.
8//!
9//! ## Example
10//! ```no_run
11//! struct Args {
12//! thing: String,
13//! number: u32,
14//! shout: bool,
15//! }
16//!
17//! fn parse_args() -> Result<Args, lexopt::Error> {
18//! use lexopt::prelude::*;
19//!
20//! let mut thing = None;
21//! let mut number = 1;
22//! let mut shout = false;
23//! let mut parser = lexopt::Parser::from_env();
24//! while let Some(arg) = parser.next()? {
25//! match arg {
26//! Short('n') | Long("number") => {
27//! number = parser.value()?.parse()?;
28//! }
29//! Long("shout") => {
30//! shout = true;
31//! }
32//! Value(val) if thing.is_none() => {
33//! thing = Some(val.string()?);
34//! }
35//! Long("help") => {
36//! println!("Usage: hello [-n|--number=NUM] [--shout] THING");
37//! std::process::exit(0);
38//! }
39//! _ => return Err(arg.unexpected()),
40//! }
41//! }
42//!
43//! Ok(Args {
44//! thing: thing.ok_or("missing argument THING")?,
45//! number,
46//! shout,
47//! })
48//! }
49//!
50//! fn main() -> Result<(), lexopt::Error> {
51//! let args = parse_args()?;
52//! let mut message = format!("Hello {}", args.thing);
53//! if args.shout {
54//! message = message.to_uppercase();
55//! }
56//! for _ in 0..args.number {
57//! println!("{}", message);
58//! }
59//! Ok(())
60//! }
61//! ```
62//! Let's walk through this:
63//! - We start parsing with [`Parser::from_env`].
64//! - We call [`parser.next()`][Parser::next] in a loop to get all the arguments until they run out.
65//! - We match on arguments. [`Short`][Arg::Short] and [`Long`][Arg::Long] indicate an option.
66//! - To get the value that belongs to an option (like `10` in `-n 10`) we call [`parser.value()`][Parser::value].
67//! - This returns a standard [`OsString`][std::ffi::OsString].
68//! - For convenience, [`use lexopt::prelude::*`][prelude] adds a [`.parse()`][ValueExt::parse] method, analogous to [`str::parse`].
69//! - Calling `parser.value()` is how we tell `Parser` that `-n` takes a value at all.
70//! - `Value` indicates a free-standing argument.
71//! - `if thing.is_none()` is a useful pattern for positional arguments. If we already found `thing` we pass it on to another case.
72//! - It also contains an `OsString`.
73//! - The [`.string()`][ValueExt::string] method decodes it into a plain `String`.
74//! - If we don't know what to do with an argument we use [`return Err(arg.unexpected())`][Arg::unexpected] to turn it into an error message.
75//! - Strings can be promoted to errors for custom error messages.
76
77#![forbid(unsafe_code)]
78#![warn(missing_docs, missing_debug_implementations, elided_lifetimes_in_paths)]
79#![allow(clippy::should_implement_trait)]
80
81use std::{
82 ffi::{OsStr, OsString},
83 fmt::Display,
84 mem::replace,
85 str::FromStr,
86};
87
88#[cfg(unix)]
89use std::os::unix::ffi::{OsStrExt, OsStringExt};
90#[cfg(target_os = "wasi")]
91use std::os::wasi::ffi::{OsStrExt, OsStringExt};
92#[cfg(windows)]
93use std::os::windows::ffi::{OsStrExt, OsStringExt};
94
95type InnerIter = std::vec::IntoIter<OsString>;
96
97fn make_iter(iter: impl Iterator<Item = OsString>) -> InnerIter {
98 iter.collect::<Vec<_>>().into_iter()
99}
100
101/// A parser for command line arguments.
102#[derive(Debug, Clone)]
103pub struct Parser {
104 source: InnerIter,
105 state: State,
106 /// The last option we emitted.
107 last_option: LastOption,
108 /// The name of the command (argv\[0\]).
109 bin_name: Option<String>,
110}
111
112#[derive(Debug, Clone)]
113enum State {
114 /// Nothing interesting is going on.
115 None,
116 /// We have a value left over from --option=value.
117 PendingValue(OsString),
118 /// We're in the middle of -abc.
119 ///
120 /// On Windows and other non-UTF8-OsString platforms this Vec should
121 /// only ever contain valid UTF-8 (and could instead be a String).
122 Shorts(Vec<u8>, usize),
123 #[cfg(windows)]
124 /// We're in the middle of -ab� on Windows (invalid UTF-16).
125 ShortsU16(Vec<u16>, usize),
126 /// We saw -- and know no more options are coming.
127 FinishedOpts,
128}
129
130/// We use this to keep track of the last emitted option, for error messages when
131/// an expected value is not found.
132///
133/// We also use this as storage for long options so we can hand out &str
134/// (because String doesn't support pattern matching).
135#[derive(Debug, Clone)]
136enum LastOption {
137 None,
138 Short(char),
139 Long(String),
140}
141
142/// A command line argument found by [`Parser`], either an option or a positional argument.
143#[derive(Debug, Clone, PartialEq, Eq)]
144pub enum Arg<'a> {
145 /// A short option, e.g. `Short('q')` for `-q`.
146 Short(char),
147 /// A long option, e.g. `Long("verbose")` for `--verbose`. (The dashes are not included.)
148 Long(&'a str),
149 /// A positional argument, e.g. `/dev/null`.
150 Value(OsString),
151}
152
153impl Parser {
154 /// Get the next option or positional argument.
155 ///
156 /// A return value of `Ok(None)` means the command line has been exhausted.
157 ///
158 /// Options that are not valid unicode are transformed with replacement
159 /// characters as by [`String::from_utf8_lossy`].
160 ///
161 /// # Errors
162 ///
163 /// [`Error::UnexpectedValue`] is returned if the last option had a
164 /// value that hasn't been consumed, as in `--option=value` or `-o=value`.
165 ///
166 /// It's possible to continue parsing after an error (but this is rarely useful).
167 pub fn next(&mut self) -> Result<Option<Arg<'_>>, Error> {
168 match self.state {
169 State::PendingValue(ref mut value) => {
170 // Last time we got `--long=value`, and `value` hasn't been used.
171 let value = replace(value, OsString::new());
172 self.state = State::None;
173 return Err(Error::UnexpectedValue {
174 option: self
175 .format_last_option()
176 .expect("Should only have pending value after long option"),
177 value,
178 });
179 }
180 State::Shorts(ref arg, ref mut pos) => {
181 // We're somewhere inside a -abc chain. Because we're in .next(),
182 // not .value(), we can assume that the next character is another option.
183 match first_codepoint(&arg[*pos..]) {
184 Ok(None) => {
185 self.state = State::None;
186 }
187 // If we find "-=[...]" we interpret it as an option.
188 // If we find "-o=..." then there's an unexpected value.
189 // ('-=' as an option exists, see https://linux.die.net/man/1/a2ps.)
190 // clap always interprets it as a short flag in this case, but
191 // that feels sloppy.
192 Ok(Some('=')) if *pos > 1 => {
193 return Err(Error::UnexpectedValue {
194 option: self.format_last_option().unwrap(),
195 value: self.optional_value().unwrap(),
196 });
197 }
198 Ok(Some(ch)) => {
199 *pos += ch.len_utf8();
200 self.last_option = LastOption::Short(ch);
201 return Ok(Some(Arg::Short(ch)));
202 }
203 Err(_) => {
204 // Advancing may allow recovery.
205 // This is a little iffy, there might be more bad unicode next.
206 // The standard library may turn multiple bytes into a single
207 // replacement character, but we don't imitate that.
208 *pos += 1;
209 self.last_option = LastOption::Short('�');
210 return Ok(Some(Arg::Short('�')));
211 }
212 }
213 }
214 #[cfg(windows)]
215 State::ShortsU16(ref arg, ref mut pos) => match first_utf16_codepoint(&arg[*pos..]) {
216 Ok(None) => {
217 self.state = State::None;
218 }
219 Ok(Some('=')) if *pos > 1 => {
220 return Err(Error::UnexpectedValue {
221 option: self.format_last_option().unwrap(),
222 value: self.optional_value().unwrap(),
223 });
224 }
225 Ok(Some(ch)) => {
226 *pos += ch.len_utf16();
227 self.last_option = LastOption::Short(ch);
228 return Ok(Some(Arg::Short(ch)));
229 }
230 Err(_) => {
231 *pos += 1;
232 self.last_option = LastOption::Short('�');
233 return Ok(Some(Arg::Short('�')));
234 }
235 },
236 State::FinishedOpts => {
237 return Ok(self.source.next().map(Arg::Value));
238 }
239 State::None => (),
240 }
241
242 match self.state {
243 State::None => (),
244 ref state => panic!("unexpected state {:?}", state),
245 }
246
247 let arg = match self.source.next() {
248 Some(arg) => arg,
249 None => return Ok(None),
250 };
251
252 if arg == "--" {
253 self.state = State::FinishedOpts;
254 return self.next();
255 }
256
257 #[cfg(any(unix, target_os = "wasi"))]
258 {
259 // Fast solution for platforms where OsStrings are just UTF-8-ish bytes
260 let mut arg = arg.into_vec();
261 if arg.starts_with(b"--") {
262 // Long options have two forms: --option and --option=value.
263 if let Some(ind) = arg.iter().position(|&b| b == b'=') {
264 // The value can be an OsString...
265 self.state = State::PendingValue(OsString::from_vec(arg[ind + 1..].into()));
266 arg.truncate(ind);
267 }
268 // ...but the option has to be a string.
269 // String::from_utf8_lossy().into_owned() would work, but its
270 // return type is Cow: if the original was valid a borrowed
271 // version is returned, and then into_owned() does an
272 // unnecessary copy.
273 // By trying String::from_utf8 first we avoid that copy if arg
274 // is already UTF-8 (which is most of the time).
275 // reqwest does a similar maneuver more efficiently with unsafe:
276 // https://github.com/seanmonstar/reqwest/blob/e6a1a09f0904e06de4ff1317278798c4ed28af66/src/async_impl/response.rs#L194
277 let option = match String::from_utf8(arg) {
278 Ok(text) => text,
279 Err(err) => String::from_utf8_lossy(err.as_bytes()).into_owned(),
280 };
281 Ok(Some(self.set_long(option)))
282 } else if arg.len() > 1 && arg[0] == b'-' {
283 self.state = State::Shorts(arg, 1);
284 self.next()
285 } else {
286 Ok(Some(Arg::Value(OsString::from_vec(arg))))
287 }
288 }
289
290 #[cfg(not(any(unix, target_os = "wasi")))]
291 {
292 // Platforms where looking inside an OsString is harder
293
294 #[cfg(windows)]
295 {
296 // Fast path for Windows
297 let mut bytes = arg.encode_wide();
298 const DASH: u16 = b'-' as u16;
299 match (bytes.next(), bytes.next()) {
300 (Some(DASH), Some(_)) => {
301 // This is an option, we'll have to do more work.
302 // (We already checked for "--" earlier.)
303 }
304 _ => {
305 // Just a value, return early.
306 return Ok(Some(Arg::Value(arg)));
307 }
308 }
309 }
310
311 let mut arg = match arg.into_string() {
312 Ok(arg) => arg,
313 Err(arg) => {
314 // The argument is not valid unicode.
315 // If it's an option we'll have to do something nasty,
316 // otherwise we can return it as-is.
317
318 #[cfg(windows)]
319 {
320 // On Windows we can only get here if this is an option, otherwise
321 // we return earlier.
322 // Unlike on Unix, we can't efficiently process invalid unicode.
323 // Semantically it's UTF-16, but internally it's WTF-8 (a superset of UTF-8).
324 // So we only process the raw version here, when we know we really have to.
325 let mut arg: Vec<u16> = arg.encode_wide().collect();
326 const DASH: u16 = b'-' as u16;
327 const EQ: u16 = b'=' as u16;
328 if arg.starts_with(&[DASH, DASH]) {
329 if let Some(ind) = arg.iter().position(|&u| u == EQ) {
330 self.state =
331 State::PendingValue(OsString::from_wide(&arg[ind + 1..]));
332 arg.truncate(ind);
333 }
334 let long = self.set_long(String::from_utf16_lossy(&arg));
335 return Ok(Some(long));
336 } else {
337 assert!(arg.len() > 1);
338 assert_eq!(arg[0], DASH);
339 self.state = State::ShortsU16(arg, 1);
340 return self.next();
341 }
342 };
343
344 #[cfg(not(windows))]
345 {
346 // This code may be reachable on Hermit and SGX, but probably
347 // not on wasm32-unknown-unknown, which is unfortunate as that's
348 // the only one we can easily test.
349
350 // This allocates unconditionally, sadly.
351 let text = arg.to_string_lossy();
352 if text.starts_with('-') {
353 // Use the lossily patched version and hope for the best.
354 // This may be incorrect behavior. Our only other option
355 // is an error but I don't want to write complicated code
356 // I can't actually test.
357 // Please open an issue if this behavior affects you!
358 text.into_owned()
359 } else {
360 // It didn't look like an option, so return it as a value.
361 return Ok(Some(Arg::Value(arg)));
362 }
363 }
364 }
365 };
366
367 // The argument is valid unicode. This is the ideal version of the
368 // code, the previous mess was purely to deal with invalid unicode.
369 if arg.starts_with("--") {
370 if let Some(ind) = arg.find('=') {
371 self.state = State::PendingValue(arg[ind + 1..].into());
372 arg.truncate(ind);
373 }
374 Ok(Some(self.set_long(arg)))
375 } else if arg.starts_with('-') && arg != "-" {
376 self.state = State::Shorts(arg.into(), 1);
377 self.next()
378 } else {
379 Ok(Some(Arg::Value(arg.into())))
380 }
381 }
382 }
383
384 /// Get a value for an option.
385 ///
386 /// This function should normally be called right after seeing an option
387 /// that expects a value, with positional arguments being collected
388 /// using [`next()`][Parser::next].
389 ///
390 /// A value is collected even if it looks like an option
391 /// (i.e., starts with `-`).
392 ///
393 /// # Errors
394 ///
395 /// An [`Error::MissingValue`] is returned if the end of the command
396 /// line is reached.
397 pub fn value(&mut self) -> Result<OsString, Error> {
398 if let Some(value) = self.optional_value() {
399 return Ok(value);
400 }
401
402 if let Some(value) = self.source.next() {
403 return Ok(value);
404 }
405
406 Err(Error::MissingValue {
407 option: self.format_last_option(),
408 })
409 }
410
411 /// Gather multiple values for an option.
412 ///
413 /// This is used for options that take multiple arguments, such as a
414 /// `--command` flag that's invoked as `app --command echo 'Hello world'`.
415 ///
416 /// It will gather arguments until another option is found, or `--` is found, or
417 /// the end of the command line is reached. This differs from `.value()`, which
418 /// takes a value even if it looks like an option.
419 ///
420 /// An equals sign (`=`) will limit this to a single value. That means `-a=b c` and
421 /// `--opt=b c` will only yield `"b"` while `-a b c`, `-ab c` and `--opt b c` will
422 /// yield `"b"`, `"c"`.
423 ///
424 /// # Errors
425 /// If not at least one value is found then [`Error::MissingValue`] is returned.
426 ///
427 /// # Example
428 /// ```
429 /// # fn main() -> Result<(), lexopt::Error> {
430 /// # use lexopt::prelude::*;
431 /// # use std::ffi::OsString;
432 /// # use std::path::PathBuf;
433 /// # let mut parser = lexopt::Parser::from_args(&["a", "b", "-x", "one", "two", "three", "four"]);
434 /// let arguments: Vec<OsString> = parser.values()?.collect();
435 /// # assert_eq!(arguments, &["a", "b"]);
436 /// # let _ = parser.next();
437 /// let at_most_three_files: Vec<PathBuf> = parser.values()?.take(3).map(Into::into).collect();
438 /// # assert_eq!(parser.raw_args()?.as_slice(), &["four"]);
439 /// for value in parser.values()? {
440 /// // ...
441 /// }
442 /// # Ok(()) }
443 /// ```
444 pub fn values(&mut self) -> Result<ValuesIter<'_>, Error> {
445 // This code is designed so that just calling .values() doesn't consume
446 // any arguments as long as you don't use the iterator. It used to work
447 // differently.
448 // "--" is treated like an option and not consumed. This seems to me the
449 // least unreasonable behavior, and it's the easiest to implement.
450 if self.has_pending() || self.next_is_normal() {
451 Ok(ValuesIter {
452 took_first: false,
453 parser: Some(self),
454 })
455 } else {
456 Err(Error::MissingValue {
457 option: self.format_last_option(),
458 })
459 }
460 }
461
462 /// Inspect an argument and consume it if it's "normal" (not an option or --).
463 ///
464 /// Used by [`Parser::values`].
465 ///
466 /// This method should not be called while partway through processing an
467 /// argument.
468 fn next_if_normal(&mut self) -> Option<OsString> {
469 if self.next_is_normal() {
470 self.source.next()
471 } else {
472 None
473 }
474 }
475
476 /// Execute the check for next_if_normal().
477 fn next_is_normal(&self) -> bool {
478 assert!(!self.has_pending());
479 let arg = match self.source.as_slice().first() {
480 // There has to be a next argument.
481 None => return false,
482 Some(arg) => arg,
483 };
484 if let State::FinishedOpts = self.state {
485 // If we already found a -- then we're really not supposed to be here,
486 // but we shouldn't treat the next argument as an option.
487 return true;
488 }
489 if arg == "-" {
490 // "-" is the one argument with a leading '-' that's allowed.
491 return true;
492 }
493 #[cfg(any(unix, target_os = "wasi"))]
494 let lead_dash = arg.as_bytes().first() == Some(&b'-');
495 #[cfg(windows)]
496 let lead_dash = arg.encode_wide().next() == Some(b'-' as u16);
497 #[cfg(not(any(unix, target_os = "wasi", windows)))]
498 let lead_dash = arg.to_string_lossy().as_bytes().first() == Some(&b'-');
499
500 !lead_dash
501 }
502
503 /// Take raw arguments from the original command line.
504 ///
505 /// This returns an iterator of [`OsString`]s. Any arguments that are not
506 /// consumed are kept, so you can continue parsing after you're done with
507 /// the iterator.
508 ///
509 /// To inspect an argument without consuming it, use [`RawArgs::peek`] or
510 /// [`RawArgs::as_slice`].
511 ///
512 /// # Errors
513 ///
514 /// Returns an [`Error::UnexpectedValue`] if the last option had a left-over
515 /// argument, as in `--option=value`, `-ovalue`, or if it was midway through
516 /// an option chain, as in `-abc`. The iterator only yields whole arguments.
517 /// To avoid this, use [`try_raw_args()`][Parser::try_raw_args].
518 ///
519 /// After this error the method is guaranteed to succeed, as it consumes the
520 /// rest of the argument.
521 ///
522 /// # Example
523 /// As soon as a free-standing argument is found, consume the other arguments
524 /// as-is, and build them into a command.
525 /// ```
526 /// # fn main() -> Result<(), lexopt::Error> {
527 /// # use lexopt::prelude::*;
528 /// # use std::ffi::OsString;
529 /// # use std::path::PathBuf;
530 /// # let mut parser = lexopt::Parser::from_args(&["-x", "echo", "-n", "'Hello, world'"]);
531 /// # while let Some(arg) = parser.next()? {
532 /// # match arg {
533 /// Value(prog) => {
534 /// let args: Vec<_> = parser.raw_args()?.collect();
535 /// let command = std::process::Command::new(prog).args(args);
536 /// }
537 /// # _ => (), }} Ok(()) }
538 pub fn raw_args(&mut self) -> Result<RawArgs<'_>, Error> {
539 if let Some(value) = self.optional_value() {
540 return Err(Error::UnexpectedValue {
541 option: self.format_last_option().unwrap(),
542 value,
543 });
544 }
545
546 Ok(RawArgs(&mut self.source))
547 }
548
549 /// Take raw arguments from the original command line, *if* the current argument
550 /// has finished processing.
551 ///
552 /// Unlike [`raw_args()`][Parser::raw_args] this does not consume any value
553 /// in case of a left-over argument. This makes it safe to call at any time.
554 ///
555 /// It returns `None` exactly when [`optional_value()`][Parser::optional_value]
556 /// would return `Some`.
557 ///
558 /// Note: If no arguments are left then it returns an empty iterator (not `None`).
559 ///
560 /// # Example
561 /// Process arguments of the form `-123` as numbers. For a complete runnable version of
562 /// this example, see
563 /// [`examples/nonstandard.rs`](https://github.com/blyxxyz/lexopt/blob/e3754e6f24506afb42394602fc257b1ad9258d84/examples/nonstandard.rs).
564 /// ```
565 /// # fn main() -> Result<(), lexopt::Error> {
566 /// # use lexopt::prelude::*;
567 /// # use std::ffi::OsString;
568 /// # use std::path::PathBuf;
569 /// # let mut parser = lexopt::Parser::from_iter(&["-13"]);
570 /// fn parse_dashnum(parser: &mut lexopt::Parser) -> Option<u64> {
571 /// let mut raw = parser.try_raw_args()?;
572 /// let arg = raw.peek()?.to_str()?;
573 /// let num = arg.strip_prefix('-')?.parse::<u64>().ok()?;
574 /// raw.next(); // Consume the argument we just parsed
575 /// Some(num)
576 /// }
577 ///
578 /// loop {
579 /// if let Some(num) = parse_dashnum(&mut parser) {
580 /// println!("Got number {}", num);
581 /// } else if let Some(arg) = parser.next()? {
582 /// match arg {
583 /// // ...
584 /// # _ => (),
585 /// }
586 /// } else {
587 /// break;
588 /// }
589 /// }
590 /// # Ok(()) }
591 /// ```
592 pub fn try_raw_args(&mut self) -> Option<RawArgs<'_>> {
593 if self.has_pending() {
594 None
595 } else {
596 Some(RawArgs(&mut self.source))
597 }
598 }
599
600 /// Check whether we're halfway through an argument, or in other words,
601 /// if [`Parser::optional_value()`] would return `Some`.
602 fn has_pending(&self) -> bool {
603 match self.state {
604 State::None | State::FinishedOpts => false,
605 State::PendingValue(_) => true,
606 State::Shorts(ref arg, pos) => pos < arg.len(),
607 #[cfg(windows)]
608 State::ShortsU16(ref arg, pos) => pos < arg.len(),
609 }
610 }
611
612 #[inline(never)]
613 fn format_last_option(&self) -> Option<String> {
614 match self.last_option {
615 LastOption::None => None,
616 LastOption::Short(ch) => Some(format!("-{}", ch)),
617 LastOption::Long(ref option) => Some(option.clone()),
618 }
619 }
620
621 /// The name of the command, as in the zeroth argument of the process.
622 ///
623 /// This is intended for use in messages. If the name is not valid unicode
624 /// it will be sanitized with replacement characters as by
625 /// [`String::from_utf8_lossy`].
626 ///
627 /// To get the current executable, use [`std::env::current_exe`].
628 ///
629 /// # Example
630 /// ```
631 /// let mut parser = lexopt::Parser::from_env();
632 /// let bin_name = parser.bin_name().unwrap_or("myapp");
633 /// println!("{}: Some message", bin_name);
634 /// ```
635 pub fn bin_name(&self) -> Option<&str> {
636 Some(self.bin_name.as_ref()?)
637 }
638
639 /// Get a value only if it's concatenated to an option, as in `-ovalue` or
640 /// `--option=value` or `-o=value`, but not `-o value` or `--option value`.
641 pub fn optional_value(&mut self) -> Option<OsString> {
642 Some(self.raw_optional_value()?.0)
643 }
644
645 /// [`Parser::optional_value`], but indicate whether the value was joined
646 /// with an = sign. This matters for [`Parser::values`].
647 fn raw_optional_value(&mut self) -> Option<(OsString, bool)> {
648 match replace(&mut self.state, State::None) {
649 State::PendingValue(value) => Some((value, true)),
650 State::Shorts(mut arg, mut pos) => {
651 if pos >= arg.len() {
652 return None;
653 }
654 let mut had_eq_sign = false;
655 if arg[pos] == b'=' {
656 // -o=value.
657 // clap actually strips out all leading '='s, but that seems silly.
658 // We allow `-xo=value`. Python's argparse doesn't strip the = in that case.
659 pos += 1;
660 had_eq_sign = true;
661 }
662 arg.drain(..pos); // Reuse allocation
663 #[cfg(any(unix, target_os = "wasi"))]
664 {
665 Some((OsString::from_vec(arg), had_eq_sign))
666 }
667 #[cfg(not(any(unix, target_os = "wasi")))]
668 {
669 let arg = String::from_utf8(arg)
670 .expect("short option args on exotic platforms must be unicode");
671 Some((arg.into(), had_eq_sign))
672 }
673 }
674 #[cfg(windows)]
675 State::ShortsU16(arg, mut pos) => {
676 if pos >= arg.len() {
677 return None;
678 }
679 let mut had_eq_sign = false;
680 if arg[pos] == b'=' as u16 {
681 pos += 1;
682 had_eq_sign = true;
683 }
684 Some((OsString::from_wide(&arg[pos..]), had_eq_sign))
685 }
686 State::FinishedOpts => {
687 // Not really supposed to be here, but it's benign and not our fault
688 self.state = State::FinishedOpts;
689 None
690 }
691 State::None => None,
692 }
693 }
694
695 fn new(bin_name: Option<OsString>, source: InnerIter) -> Parser {
696 Parser {
697 source,
698 state: State::None,
699 last_option: LastOption::None,
700 bin_name: bin_name.map(|s| match s.into_string() {
701 Ok(text) => text,
702 Err(text) => text.to_string_lossy().into_owned(),
703 }),
704 }
705 }
706
707 /// Create a parser from the environment using [`std::env::args_os`].
708 ///
709 /// This is the usual way to create a `Parser`.
710 pub fn from_env() -> Parser {
711 let mut source = make_iter(std::env::args_os());
712 Parser::new(source.next(), source)
713 }
714
715 // The collision with `FromIterator::from_iter` is a bit unfortunate.
716 // This name is used because:
717 // - `from_args()` was taken, and changing its behavior without changing
718 // its signature would be evil.
719 // - structopt also had a method by that name, so there's a precedent.
720 // (clap_derive doesn't.)
721 // - I couldn't think of a better one.
722 // When this name was chosen `FromIterator` could not actually be implemented.
723 // It can be implemented now, but I'm not sure there's a reason to.
724
725 /// Create a parser from an iterator. This is useful for testing among other things.
726 ///
727 /// The first item from the iterator **must** be the binary name, as from [`std::env::args_os`].
728 ///
729 /// The iterator is consumed immediately.
730 ///
731 /// # Example
732 /// ```
733 /// let mut parser = lexopt::Parser::from_iter(&["myapp", "-n", "10", "./foo.bar"]);
734 /// ```
735 pub fn from_iter<I>(args: I) -> Parser
736 where
737 I: IntoIterator,
738 I::Item: Into<OsString>,
739 {
740 let mut args = make_iter(args.into_iter().map(Into::into));
741 Parser::new(args.next(), args)
742 }
743
744 /// Create a parser from an iterator that does **not** include the binary name.
745 ///
746 /// The iterator is consumed immediately.
747 ///
748 /// [`bin_name()`](`Parser::bin_name`) will return `None`. Consider using
749 /// [`Parser::from_iter`] instead.
750 pub fn from_args<I>(args: I) -> Parser
751 where
752 I: IntoIterator,
753 I::Item: Into<OsString>,
754 {
755 Parser::new(None, make_iter(args.into_iter().map(Into::into)))
756 }
757
758 /// Store a long option so the caller can borrow it.
759 fn set_long(&mut self, option: String) -> Arg<'_> {
760 self.last_option = LastOption::Long(option);
761 match self.last_option {
762 LastOption::Long(ref option) => Arg::Long(&option[2..]),
763 _ => unreachable!(),
764 }
765 }
766}
767
768impl Arg<'_> {
769 /// Convert an unexpected argument into an error.
770 pub fn unexpected(self) -> Error {
771 match self {
772 Arg::Short(short: char) => Error::UnexpectedOption(format!("-{}", short)),
773 Arg::Long(long: &str) => Error::UnexpectedOption(format!("--{}", long)),
774 Arg::Value(value: OsString) => Error::UnexpectedArgument(value),
775 }
776 }
777}
778
779/// An iterator for multiple option-arguments, returned by [`Parser::values`].
780///
781/// It's guaranteed to yield at least one value.
782#[derive(Debug)]
783pub struct ValuesIter<'a> {
784 took_first: bool,
785 parser: Option<&'a mut Parser>,
786}
787
788impl Iterator for ValuesIter<'_> {
789 type Item = OsString;
790
791 fn next(&mut self) -> Option<Self::Item> {
792 let parser: &mut &mut Parser = self.parser.as_mut()?;
793 if self.took_first {
794 parser.next_if_normal()
795 } else if let Some((value: OsString, had_eq_sign: bool)) = parser.raw_optional_value() {
796 if had_eq_sign {
797 self.parser = None;
798 }
799 self.took_first = true;
800 Some(value)
801 } else {
802 let value: OsString = parser
803 .next_if_normal()
804 .expect(msg:"ValuesIter must yield at least one value");
805 self.took_first = true;
806 Some(value)
807 }
808 }
809}
810
811/// An iterator for the remaining raw arguments, returned by [`Parser::raw_args`].
812#[derive(Debug)]
813pub struct RawArgs<'a>(&'a mut InnerIter);
814
815impl Iterator for RawArgs<'_> {
816 type Item = OsString;
817
818 fn next(&mut self) -> Option<Self::Item> {
819 self.0.next()
820 }
821}
822
823impl RawArgs<'_> {
824 /// Return a reference to the next() value without consuming it.
825 ///
826 /// An argument you peek but do not consume will still be seen by `Parser`
827 /// if you resume parsing.
828 ///
829 /// See [`Iterator::peekable`], [`std::iter::Peekable::peek`].
830 pub fn peek(&self) -> Option<&OsStr> {
831 Some(self.0.as_slice().first()?.as_os_str())
832 }
833
834 /// Consume and return the next argument if a condition is true.
835 ///
836 /// See [`std::iter::Peekable::next_if`].
837 pub fn next_if(&mut self, func: impl FnOnce(&OsStr) -> bool) -> Option<OsString> {
838 match self.peek() {
839 Some(arg) if func(arg) => self.next(),
840 _ => None,
841 }
842 }
843
844 /// Return the remaining arguments as a slice.
845 pub fn as_slice(&self) -> &[OsString] {
846 self.0.as_slice()
847 }
848}
849
850// These would make sense:
851// - fn RawArgs::iter(&self)
852// - impl IntoIterator for &RawArgs
853// - impl AsRef<[OsString]> for RawArgs
854// But they're niche and constrain future design.
855// Let's leave them out for now.
856// (Open question: should iter() return std::slice::Iter<OsString> and get
857// an optimized .nth() and so on for free, or should it return a novel type
858// that yields &OsStr?)
859
860/// An error during argument parsing.
861///
862/// This implements `From<String>` and `From<&str>`, for easy ad-hoc error
863/// messages.
864//
865// This is not #[non_exhaustive] because of the MSRV. I'm hoping no more
866// variants will turn out to be needed: this seems reasonable, if the scope
867// of the library doesn't change. Worst case scenario it can be stuffed inside
868// Error::Custom.
869pub enum Error {
870 /// An option argument was expected but was not found.
871 MissingValue {
872 /// The most recently emitted option.
873 option: Option<String>,
874 },
875
876 /// An unexpected option was found.
877 UnexpectedOption(String),
878
879 /// A positional argument was found when none was expected.
880 UnexpectedArgument(OsString),
881
882 /// An option had a value when none was expected.
883 UnexpectedValue {
884 /// The option.
885 option: String,
886 /// The value.
887 value: OsString,
888 },
889
890 /// Parsing a value failed. Returned by methods on [`ValueExt`].
891 ParsingFailed {
892 /// The string that failed to parse.
893 value: String,
894 /// The error returned while parsing.
895 error: Box<dyn std::error::Error + Send + Sync + 'static>,
896 },
897
898 /// A value was found that was not valid unicode.
899 ///
900 /// This can be returned by the methods on [`ValueExt`].
901 NonUnicodeValue(OsString),
902
903 /// For custom error messages in application code.
904 Custom(Box<dyn std::error::Error + Send + Sync + 'static>),
905}
906
907impl Display for Error {
908 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
909 use crate::Error::*;
910 match self {
911 MissingValue { option: None } => write!(f, "missing argument"),
912 MissingValue {
913 option: Some(option),
914 } => {
915 write!(f, "missing argument for option '{}'", option)
916 }
917 UnexpectedOption(option) => write!(f, "invalid option '{}'", option),
918 UnexpectedArgument(value) => write!(f, "unexpected argument {:?}", value),
919 UnexpectedValue { option, value } => {
920 write!(
921 f,
922 "unexpected argument for option '{}': {:?}",
923 option, value
924 )
925 }
926 NonUnicodeValue(value) => write!(f, "argument is invalid unicode: {:?}", value),
927 ParsingFailed { value, error } => {
928 write!(f, "cannot parse argument {:?}: {}", value, error)
929 }
930 Custom(err) => write!(f, "{}", err),
931 }
932 }
933}
934
935// This is printed when returning an error from main(), so defer to Display
936impl std::fmt::Debug for Error {
937 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
938 Display::fmt(self, f)
939 }
940}
941
942impl std::error::Error for Error {
943 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
944 match self {
945 Error::ParsingFailed { error: &Box, .. } | Error::Custom(error: &Box) => Some(error.as_ref()),
946 _ => None,
947 }
948 }
949}
950
951impl From<String> for Error {
952 fn from(msg: String) -> Self {
953 Error::Custom(msg.into())
954 }
955}
956
957impl<'a> From<&'a str> for Error {
958 fn from(msg: &'a str) -> Self {
959 Error::Custom(msg.into())
960 }
961}
962
963/// For [`OsString::into_string`], so it may be used with the try (`?`) operator.
964///
965/// [`ValueExt::string`] is the new preferred method because it's compatible with
966/// catch-all error types like `anyhow::Error`.
967impl From<OsString> for Error {
968 fn from(arg: OsString) -> Self {
969 Error::NonUnicodeValue(arg)
970 }
971}
972
973mod private {
974 pub trait Sealed {}
975 impl Sealed for std::ffi::OsString {}
976}
977
978/// An optional extension trait with methods for parsing [`OsString`]s.
979///
980/// They may fail in two cases:
981/// - The value cannot be decoded because it's invalid unicode
982/// ([`Error::NonUnicodeValue`])
983/// - The value can be decoded, but parsing fails ([`Error::ParsingFailed`])
984///
985/// If parsing fails the error will be wrapped in lexopt's own [`Error`] type.
986pub trait ValueExt: private::Sealed {
987 /// Decode the value and parse it using [`FromStr`].
988 ///
989 /// This will fail if the value is not valid unicode or if the subsequent
990 /// parsing fails.
991 fn parse<T: FromStr>(&self) -> Result<T, Error>
992 where
993 T::Err: Into<Box<dyn std::error::Error + Send + Sync + 'static>>;
994
995 /// Decode the value and parse it using a custom function.
996 fn parse_with<F, T, E>(&self, func: F) -> Result<T, Error>
997 where
998 F: FnOnce(&str) -> Result<T, E>,
999 E: Into<Box<dyn std::error::Error + Send + Sync + 'static>>;
1000
1001 // There is no parse_os_with() because I can't think of any useful
1002 // fallible operations on an OsString. Typically you'd either decode it,
1003 // use it as is, or do an infallible conversion to a PathBuf or such.
1004 //
1005 // If you have a use for parse_os_with() please open an issue with an
1006 // example.
1007
1008 /// Convert the `OsString` into a [`String`] if it's valid Unicode.
1009 ///
1010 /// This is like [`OsString::into_string`] but returns an
1011 /// [`Error::NonUnicodeValue`] on error instead of the original `OsString`.
1012 /// This makes it easier to propagate the failure with libraries like
1013 /// `anyhow`.
1014 fn string(self) -> Result<String, Error>;
1015}
1016
1017impl ValueExt for OsString {
1018 fn parse<T: FromStr>(&self) -> Result<T, Error>
1019 where
1020 T::Err: Into<Box<dyn std::error::Error + Send + Sync + 'static>>,
1021 {
1022 self.parse_with(FromStr::from_str)
1023 }
1024
1025 fn parse_with<F, T, E>(&self, func: F) -> Result<T, Error>
1026 where
1027 F: FnOnce(&str) -> Result<T, E>,
1028 E: Into<Box<dyn std::error::Error + Send + Sync + 'static>>,
1029 {
1030 match self.to_str() {
1031 Some(text) => match func(text) {
1032 Ok(value) => Ok(value),
1033 Err(err) => Err(Error::ParsingFailed {
1034 value: text.to_owned(),
1035 error: err.into(),
1036 }),
1037 },
1038 None => Err(Error::NonUnicodeValue(self.into())),
1039 }
1040 }
1041
1042 fn string(self) -> Result<String, Error> {
1043 match self.into_string() {
1044 Ok(string) => Ok(string),
1045 Err(raw) => Err(Error::NonUnicodeValue(raw)),
1046 }
1047 }
1048}
1049
1050/// A small prelude for processing arguments.
1051///
1052/// It allows you to write `Short`/`Long`/`Value` without an [`Arg`] prefix
1053/// and adds convenience methods to [`OsString`].
1054///
1055/// If this is used it's best to import it inside a function, not in module
1056/// scope:
1057/// ```
1058/// # struct Args;
1059/// fn parse_args() -> Result<Args, lexopt::Error> {
1060/// use lexopt::prelude::*;
1061/// // ...
1062/// # Ok(Args)
1063/// }
1064/// ```
1065pub mod prelude {
1066 pub use super::Arg::*;
1067 pub use super::ValueExt;
1068}
1069
1070/// Take the first codepoint of a bytestring. On error, return the first
1071/// (and therefore in some way invalid) byte/code unit.
1072///
1073/// The rest of the bytestring does not have to be valid unicode.
1074fn first_codepoint(bytes: &[u8]) -> Result<Option<char>, u8> {
1075 // We only need the first 4 bytes
1076 let bytes: &[u8] = bytes.get(..4).unwrap_or(default:bytes);
1077 let text: &str = match std::str::from_utf8(bytes) {
1078 Ok(text: &str) => text,
1079 Err(err: Utf8Error) if err.valid_up_to() > 0 => {
1080 std::str::from_utf8(&bytes[..err.valid_up_to()]).unwrap()
1081 }
1082 Err(_) => return Err(bytes[0]),
1083 };
1084 Ok(text.chars().next())
1085}
1086
1087#[cfg(windows)]
1088/// As before, but for UTF-16.
1089fn first_utf16_codepoint(units: &[u16]) -> Result<Option<char>, u16> {
1090 match std::char::decode_utf16(units.iter().cloned()).next() {
1091 Some(Ok(ch)) => Ok(Some(ch)),
1092 Some(Err(_)) => Err(units[0]),
1093 None => Ok(None),
1094 }
1095}
1096
1097#[cfg(test)]
1098mod tests {
1099 use super::prelude::*;
1100 use super::*;
1101
1102 fn parse(args: &'static str) -> Parser {
1103 Parser::from_args(args.split_whitespace().map(bad_string))
1104 }
1105
1106 /// Specialized backport of matches!()
1107 macro_rules! assert_matches {
1108 ($expression: expr, $( $pattern: pat )|+) => {
1109 match $expression {
1110 $( $pattern )|+ => (),
1111 _ => panic!(
1112 "{:?} does not match {:?}",
1113 stringify!($expression),
1114 stringify!($( $pattern )|+)
1115 ),
1116 }
1117 };
1118 }
1119
1120 #[test]
1121 fn test_basic() -> Result<(), Error> {
1122 let mut p = parse("-n 10 foo - -- baz -qux");
1123 assert_eq!(p.next()?.unwrap(), Short('n'));
1124 assert_eq!(p.value()?.parse::<i32>()?, 10);
1125 assert_eq!(p.next()?.unwrap(), Value("foo".into()));
1126 assert_eq!(p.next()?.unwrap(), Value("-".into()));
1127 assert_eq!(p.next()?.unwrap(), Value("baz".into()));
1128 assert_eq!(p.next()?.unwrap(), Value("-qux".into()));
1129 assert_eq!(p.next()?, None);
1130 assert_eq!(p.next()?, None);
1131 assert_eq!(p.next()?, None);
1132 Ok(())
1133 }
1134
1135 #[test]
1136 fn test_combined() -> Result<(), Error> {
1137 let mut p = parse("-abc -fvalue -xfvalue");
1138 assert_eq!(p.next()?.unwrap(), Short('a'));
1139 assert_eq!(p.next()?.unwrap(), Short('b'));
1140 assert_eq!(p.next()?.unwrap(), Short('c'));
1141 assert_eq!(p.next()?.unwrap(), Short('f'));
1142 assert_eq!(p.value()?, "value");
1143 assert_eq!(p.next()?.unwrap(), Short('x'));
1144 assert_eq!(p.next()?.unwrap(), Short('f'));
1145 assert_eq!(p.value()?, "value");
1146 assert_eq!(p.next()?, None);
1147 Ok(())
1148 }
1149
1150 #[test]
1151 fn test_long() -> Result<(), Error> {
1152 let mut p = parse("--foo --bar=qux --foobar=qux=baz");
1153 assert_eq!(p.next()?.unwrap(), Long("foo"));
1154 assert_eq!(p.next()?.unwrap(), Long("bar"));
1155 assert_eq!(p.value()?, "qux");
1156 assert_eq!(p.next()?.unwrap(), Long("foobar"));
1157 match p.next().unwrap_err() {
1158 Error::UnexpectedValue { option, value } => {
1159 assert_eq!(option, "--foobar");
1160 assert_eq!(value, "qux=baz");
1161 }
1162 _ => panic!(),
1163 }
1164 assert_eq!(p.next()?, None);
1165 Ok(())
1166 }
1167
1168 #[test]
1169 fn test_dash_args() -> Result<(), Error> {
1170 // "--" should indicate the end of the options
1171 let mut p = parse("-x -- -y");
1172 assert_eq!(p.next()?.unwrap(), Short('x'));
1173 assert_eq!(p.next()?.unwrap(), Value("-y".into()));
1174 assert_eq!(p.next()?, None);
1175
1176 // ...unless it's an argument of an option
1177 let mut p = parse("-x -- -y");
1178 assert_eq!(p.next()?.unwrap(), Short('x'));
1179 assert_eq!(p.value()?, "--");
1180 assert_eq!(p.next()?.unwrap(), Short('y'));
1181 assert_eq!(p.next()?, None);
1182
1183 // "-" is a valid value that should not be treated as an option
1184 let mut p = parse("-x - -y");
1185 assert_eq!(p.next()?.unwrap(), Short('x'));
1186 assert_eq!(p.next()?.unwrap(), Value("-".into()));
1187 assert_eq!(p.next()?.unwrap(), Short('y'));
1188 assert_eq!(p.next()?, None);
1189
1190 // '-' is a silly and hard to use short option, but other parsers treat
1191 // it like an option in this position
1192 let mut p = parse("-x-y");
1193 assert_eq!(p.next()?.unwrap(), Short('x'));
1194 assert_eq!(p.next()?.unwrap(), Short('-'));
1195 assert_eq!(p.next()?.unwrap(), Short('y'));
1196 assert_eq!(p.next()?, None);
1197
1198 Ok(())
1199 }
1200
1201 #[test]
1202 fn test_missing_value() -> Result<(), Error> {
1203 let mut p = parse("-o");
1204 assert_eq!(p.next()?.unwrap(), Short('o'));
1205 match p.value() {
1206 Err(Error::MissingValue {
1207 option: Some(option),
1208 }) => assert_eq!(option, "-o"),
1209 _ => panic!(),
1210 }
1211
1212 let mut q = parse("--out");
1213 assert_eq!(q.next()?.unwrap(), Long("out"));
1214 match q.value() {
1215 Err(Error::MissingValue {
1216 option: Some(option),
1217 }) => assert_eq!(option, "--out"),
1218 _ => panic!(),
1219 }
1220
1221 let mut r = parse("");
1222 assert_matches!(r.value(), Err(Error::MissingValue { option: None }));
1223
1224 Ok(())
1225 }
1226
1227 #[test]
1228 fn test_weird_args() -> Result<(), Error> {
1229 let mut p = Parser::from_args(&[
1230 "", "--=", "--=3", "-", "-x", "--", "-", "-x", "--", "", "-", "-x",
1231 ]);
1232 assert_eq!(p.next()?.unwrap(), Value(OsString::from("")));
1233
1234 // These are weird and questionable, but this seems to be the standard
1235 // interpretation
1236 // GNU getopt_long and argparse complain that it could be an abbreviation
1237 // of every single long option
1238 // clap complains that "--" is not expected, which matches its treatment
1239 // of unknown long options
1240 assert_eq!(p.next()?.unwrap(), Long(""));
1241 assert_eq!(p.value()?, OsString::from(""));
1242 assert_eq!(p.next()?.unwrap(), Long(""));
1243 assert_eq!(p.value()?, OsString::from("3"));
1244
1245 assert_eq!(p.next()?.unwrap(), Value(OsString::from("-")));
1246 assert_eq!(p.next()?.unwrap(), Short('x'));
1247 assert_eq!(p.value()?, OsString::from("--"));
1248 assert_eq!(p.next()?.unwrap(), Value(OsString::from("-")));
1249 assert_eq!(p.next()?.unwrap(), Short('x'));
1250 assert_eq!(p.next()?.unwrap(), Value(OsString::from("")));
1251 assert_eq!(p.next()?.unwrap(), Value(OsString::from("-")));
1252 assert_eq!(p.next()?.unwrap(), Value(OsString::from("-x")));
1253 assert_eq!(p.next()?, None);
1254
1255 #[cfg(any(unix, target_os = "wasi", windows))]
1256 {
1257 let mut q = parse("--=@");
1258 assert_eq!(q.next()?.unwrap(), Long(""));
1259 assert_eq!(q.value()?, bad_string("@"));
1260 assert_eq!(q.next()?, None);
1261 }
1262
1263 let mut r = parse("");
1264 assert_eq!(r.next()?, None);
1265
1266 Ok(())
1267 }
1268
1269 #[test]
1270 fn test_unicode() -> Result<(), Error> {
1271 let mut p = parse("-aµ --µ=10 µ --foo=µ");
1272 assert_eq!(p.next()?.unwrap(), Short('a'));
1273 assert_eq!(p.next()?.unwrap(), Short('µ'));
1274 assert_eq!(p.next()?.unwrap(), Long("µ"));
1275 assert_eq!(p.value()?, "10");
1276 assert_eq!(p.next()?.unwrap(), Value("µ".into()));
1277 assert_eq!(p.next()?.unwrap(), Long("foo"));
1278 assert_eq!(p.value()?, "µ");
1279 Ok(())
1280 }
1281
1282 #[cfg(any(unix, target_os = "wasi", windows))]
1283 #[test]
1284 fn test_mixed_invalid() -> Result<(), Error> {
1285 let mut p = parse("--foo=@@@");
1286 assert_eq!(p.next()?.unwrap(), Long("foo"));
1287 assert_eq!(p.value()?, bad_string("@@@"));
1288
1289 let mut q = parse("-💣@@@");
1290 assert_eq!(q.next()?.unwrap(), Short('💣'));
1291 assert_eq!(q.value()?, bad_string("@@@"));
1292
1293 let mut r = parse("-f@@@");
1294 assert_eq!(r.next()?.unwrap(), Short('f'));
1295 assert_eq!(r.next()?.unwrap(), Short('�'));
1296 assert_eq!(r.next()?.unwrap(), Short('�'));
1297 assert_eq!(r.next()?.unwrap(), Short('�'));
1298 assert_eq!(r.next()?, None);
1299
1300 let mut s = parse("--foo=bar=@@@");
1301 assert_eq!(s.next()?.unwrap(), Long("foo"));
1302 assert_eq!(s.value()?, bad_string("bar=@@@"));
1303
1304 Ok(())
1305 }
1306
1307 #[cfg(any(unix, target_os = "wasi", windows))]
1308 #[test]
1309 fn test_separate_invalid() -> Result<(), Error> {
1310 let mut p = parse("--foo @@@");
1311 assert_eq!(p.next()?.unwrap(), Long("foo"));
1312 assert_eq!(p.value()?, bad_string("@@@"));
1313 Ok(())
1314 }
1315
1316 #[cfg(any(unix, target_os = "wasi", windows))]
1317 #[test]
1318 fn test_invalid_long_option() -> Result<(), Error> {
1319 let mut p = parse("--@=10");
1320 assert_eq!(p.next()?.unwrap(), Long("�"));
1321 assert_eq!(p.value().unwrap(), OsString::from("10"));
1322 assert_eq!(p.next()?, None);
1323
1324 let mut q = parse("--@");
1325 assert_eq!(q.next()?.unwrap(), Long("�"));
1326 assert_eq!(q.next()?, None);
1327
1328 Ok(())
1329 }
1330
1331 #[test]
1332 fn short_opt_equals_sign() -> Result<(), Error> {
1333 let mut p = parse("-a=b");
1334 assert_eq!(p.next()?.unwrap(), Short('a'));
1335 assert_eq!(p.value()?, OsString::from("b"));
1336 assert_eq!(p.next()?, None);
1337
1338 let mut p = parse("-a=b");
1339 assert_eq!(p.next()?.unwrap(), Short('a'));
1340 match p.next().unwrap_err() {
1341 Error::UnexpectedValue { option, value } => {
1342 assert_eq!(option, "-a");
1343 assert_eq!(value, "b");
1344 }
1345 _ => panic!(),
1346 }
1347 assert_eq!(p.next()?, None);
1348
1349 let mut p = parse("-a=");
1350 assert_eq!(p.next()?.unwrap(), Short('a'));
1351 assert_eq!(p.value()?, OsString::from(""));
1352 assert_eq!(p.next()?, None);
1353
1354 let mut p = parse("-a=");
1355 assert_eq!(p.next()?.unwrap(), Short('a'));
1356 match p.next().unwrap_err() {
1357 Error::UnexpectedValue { option, value } => {
1358 assert_eq!(option, "-a");
1359 assert_eq!(value, "");
1360 }
1361 _ => panic!(),
1362 }
1363 assert_eq!(p.next()?, None);
1364
1365 let mut p = parse("-=");
1366 assert_eq!(p.next()?.unwrap(), Short('='));
1367 assert_eq!(p.next()?, None);
1368
1369 let mut p = parse("-=a");
1370 assert_eq!(p.next()?.unwrap(), Short('='));
1371 assert_eq!(p.value()?, "a");
1372
1373 Ok(())
1374 }
1375
1376 #[cfg(any(unix, target_os = "wasi", windows))]
1377 #[test]
1378 fn short_opt_equals_sign_invalid() -> Result<(), Error> {
1379 let mut p = parse("-a=@");
1380 assert_eq!(p.next()?.unwrap(), Short('a'));
1381 assert_eq!(p.value()?, bad_string("@"));
1382 assert_eq!(p.next()?, None);
1383
1384 let mut p = parse("-a=@");
1385 assert_eq!(p.next()?.unwrap(), Short('a'));
1386 match p.next().unwrap_err() {
1387 Error::UnexpectedValue { option, value } => {
1388 assert_eq!(option, "-a");
1389 assert_eq!(value, bad_string("@"));
1390 }
1391 _ => panic!(),
1392 }
1393 assert_eq!(p.next()?, None);
1394
1395 let mut p = parse("-=@");
1396 assert_eq!(p.next()?.unwrap(), Short('='));
1397 assert_eq!(p.value()?, bad_string("@"));
1398
1399 Ok(())
1400 }
1401
1402 #[test]
1403 fn multi_values() -> Result<(), Error> {
1404 for &case in &["-a b c d", "-ab c d", "-a b c d --", "--a b c d"] {
1405 let mut p = parse(case);
1406 p.next()?.unwrap();
1407 let mut iter = p.values()?;
1408 let values: Vec<_> = iter.by_ref().collect();
1409 assert_eq!(values, &["b", "c", "d"]);
1410 assert!(iter.next().is_none());
1411 assert!(p.next()?.is_none());
1412 }
1413
1414 for &case in &["-a=b c", "--a=b c"] {
1415 let mut p = parse(case);
1416 p.next()?.unwrap();
1417 let mut iter = p.values()?;
1418 let values: Vec<_> = iter.by_ref().collect();
1419 assert_eq!(values, &["b"]);
1420 assert!(iter.next().is_none());
1421 assert_eq!(p.next()?.unwrap(), Value("c".into()));
1422 assert!(p.next()?.is_none());
1423 }
1424
1425 for &case in &["-a", "--a", "-a -b", "-a -- b", "-a --"] {
1426 let mut p = parse(case);
1427 p.next()?.unwrap();
1428 assert!(p.values().is_err());
1429 assert!(p.next().is_ok());
1430 assert!(p.next().unwrap().is_none());
1431 }
1432
1433 for &case in &["-a=", "--a="] {
1434 let mut p = parse(case);
1435 p.next()?.unwrap();
1436 let mut iter = p.values()?;
1437 let values: Vec<_> = iter.by_ref().collect();
1438 assert_eq!(values, &[""]);
1439 assert!(iter.next().is_none());
1440 assert!(p.next()?.is_none());
1441 }
1442
1443 // Test that .values() does not eagerly consume the first value
1444 for &case in &["-a=b", "--a=b", "-a b"] {
1445 let mut p = parse(case);
1446 p.next()?.unwrap();
1447 assert!(p.values().is_ok());
1448 assert_eq!(p.value()?, "b");
1449 }
1450
1451 {
1452 let mut p = parse("-ab");
1453 p.next()?.unwrap();
1454 assert!(p.values().is_ok());
1455 assert_eq!(p.next()?.unwrap(), Short('b'));
1456 }
1457
1458 Ok(())
1459 }
1460
1461 #[test]
1462 fn raw_args() -> Result<(), Error> {
1463 let mut p = parse("-a b c d");
1464 assert!(p.try_raw_args().is_some());
1465 assert_eq!(p.raw_args()?.collect::<Vec<_>>(), &["-a", "b", "c", "d"]);
1466 assert!(p.try_raw_args().is_some());
1467 assert!(p.next()?.is_none());
1468 assert!(p.try_raw_args().is_some());
1469 assert_eq!(p.raw_args()?.as_slice().len(), 0);
1470
1471 let mut p = parse("-ab c d");
1472 p.next()?;
1473 assert!(p.try_raw_args().is_none());
1474 assert!(p.raw_args().is_err());
1475 assert_eq!(p.try_raw_args().unwrap().collect::<Vec<_>>(), &["c", "d"]);
1476 assert!(p.next()?.is_none());
1477 assert_eq!(p.try_raw_args().unwrap().as_slice().len(), 0);
1478
1479 let mut p = parse("-a b c d");
1480 assert_eq!(p.raw_args()?.take(3).collect::<Vec<_>>(), &["-a", "b", "c"]);
1481 assert_eq!(p.next()?, Some(Value("d".into())));
1482 assert!(p.next()?.is_none());
1483
1484 let mut p = parse("a");
1485 let mut it = p.raw_args()?;
1486 assert_eq!(it.peek(), Some("a".as_ref()));
1487 assert_eq!(it.next_if(|_| false), None);
1488 assert_eq!(p.next()?, Some(Value("a".into())));
1489 assert!(p.next()?.is_none());
1490
1491 Ok(())
1492 }
1493
1494 #[test]
1495 fn bin_name() {
1496 assert_eq!(
1497 Parser::from_iter(&["foo", "bar", "baz"]).bin_name(),
1498 Some("foo")
1499 );
1500 assert_eq!(Parser::from_args(&["foo", "bar", "baz"]).bin_name(), None);
1501 assert_eq!(Parser::from_iter(&[] as &[&str]).bin_name(), None);
1502 assert_eq!(Parser::from_iter(&[""]).bin_name(), Some(""));
1503 assert!(Parser::from_env().bin_name().is_some());
1504 #[cfg(any(unix, target_os = "wasi", windows))]
1505 assert_eq!(
1506 Parser::from_iter(vec![bad_string("foo@bar")]).bin_name(),
1507 Some("foo�bar")
1508 );
1509 }
1510
1511 #[test]
1512 fn test_value_ext() -> Result<(), Error> {
1513 let s = OsString::from("-10");
1514 assert_eq!(s.parse::<i32>()?, -10);
1515 assert_eq!(
1516 s.parse_with(|s| match s {
1517 "-10" => Ok(0),
1518 _ => Err("bad"),
1519 })?,
1520 0,
1521 );
1522 match s.parse::<u32>() {
1523 Err(Error::ParsingFailed { value, .. }) => assert_eq!(value, "-10"),
1524 _ => panic!(),
1525 }
1526 match s.parse_with(|s| match s {
1527 "11" => Ok(0_i32),
1528 _ => Err("bad"),
1529 }) {
1530 Err(Error::ParsingFailed { value, .. }) => assert_eq!(value, "-10"),
1531 _ => panic!(),
1532 }
1533 assert_eq!(s.string()?, "-10");
1534 Ok(())
1535 }
1536
1537 #[cfg(any(unix, target_os = "wasi", windows))]
1538 #[test]
1539 fn test_value_ext_invalid() -> Result<(), Error> {
1540 let s = bad_string("foo@");
1541 assert_matches!(s.parse::<i32>(), Err(Error::NonUnicodeValue(_)));
1542 assert_matches!(
1543 s.parse_with(<f32 as FromStr>::from_str),
1544 Err(Error::NonUnicodeValue(_))
1545 );
1546 assert_matches!(s.string(), Err(Error::NonUnicodeValue(_)));
1547 Ok(())
1548 }
1549
1550 #[test]
1551 fn test_first_codepoint() {
1552 assert_eq!(first_codepoint(b"foo").unwrap(), Some('f'));
1553 assert_eq!(first_codepoint(b"").unwrap(), None);
1554 assert_eq!(first_codepoint(b"f\xFF\xFF").unwrap(), Some('f'));
1555 assert_eq!(first_codepoint(b"\xC2\xB5bar").unwrap(), Some('µ'));
1556 first_codepoint(b"\xFF").unwrap_err();
1557 assert_eq!(first_codepoint(b"foo\xC2\xB5").unwrap(), Some('f'));
1558 }
1559
1560 /// Transform @ characters into invalid unicode.
1561 fn bad_string(text: &str) -> OsString {
1562 #[cfg(any(unix, target_os = "wasi"))]
1563 {
1564 let mut text = text.as_bytes().to_vec();
1565 for ch in &mut text {
1566 if *ch == b'@' {
1567 *ch = b'\xFF';
1568 }
1569 }
1570 OsString::from_vec(text)
1571 }
1572 #[cfg(windows)]
1573 {
1574 let mut out = Vec::new();
1575 for ch in text.chars() {
1576 if ch == '@' {
1577 out.push(0xD800);
1578 } else {
1579 let mut buf = [0; 2];
1580 out.extend(&*ch.encode_utf16(&mut buf));
1581 }
1582 }
1583 OsString::from_wide(&out)
1584 }
1585 #[cfg(not(any(unix, target_os = "wasi", windows)))]
1586 {
1587 if text.contains('@') {
1588 unimplemented!("Don't know how to create invalid OsStrings on this platform");
1589 }
1590 text.into()
1591 }
1592 }
1593
1594 /// Basic exhaustive testing of short combinations of "interesting"
1595 /// arguments. They should not panic, not hang, and pass some checks.
1596 ///
1597 /// The advantage compared to full fuzzing is that it runs on all platforms
1598 /// and together with the other tests. cargo-fuzz doesn't work on Windows
1599 /// and requires a special incantation.
1600 ///
1601 /// A disadvantage is that it's still limited by arguments I could think of
1602 /// and only does very short sequences. Another is that it's bad at
1603 /// reporting failure, though the println!() helps.
1604 ///
1605 /// This test takes a while to run.
1606 #[test]
1607 fn basic_fuzz() {
1608 #[cfg(any(windows, unix, target_os = "wasi"))]
1609 const VOCABULARY: &[&str] = &[
1610 "", "-", "--", "---", "a", "-a", "-aa", "@", "-@", "-a@", "-@a", "--a", "--@", "--a=a",
1611 "--a=", "--a=@", "--@=a", "--=", "--=@", "--=a", "-@@", "-a=a", "-a=", "-=", "-a-",
1612 ];
1613 #[cfg(not(any(windows, unix, target_os = "wasi")))]
1614 const VOCABULARY: &[&str] = &[
1615 "", "-", "--", "---", "a", "-a", "-aa", "--a", "--a=a", "--a=", "--=", "--=a", "-a=a",
1616 "-a=", "-=", "-a-",
1617 ];
1618 exhaust(Parser::new(None, Vec::new().into_iter()), 0);
1619 let vocabulary: Vec<OsString> = VOCABULARY.iter().map(|&s| bad_string(s)).collect();
1620 let mut permutations = vec![vec![]];
1621 for _ in 0..3 {
1622 let mut new = Vec::new();
1623 for old in permutations {
1624 for word in &vocabulary {
1625 let mut extended = old.clone();
1626 extended.push(word);
1627 new.push(extended);
1628 }
1629 }
1630 permutations = new;
1631 for permutation in &permutations {
1632 println!("{:?}", permutation);
1633 let p = Parser::from_args(permutation);
1634 exhaust(p, 0);
1635 }
1636 }
1637 }
1638
1639 /// Run many sequences of methods on a Parser.
1640 fn exhaust(mut parser: Parser, depth: u16) {
1641 if depth > 100 {
1642 panic!("Stuck in loop");
1643 }
1644
1645 // has_pending() == optional_value().is_some()
1646 if parser.has_pending() {
1647 {
1648 let mut parser = parser.clone();
1649 assert!(parser.try_raw_args().is_none());
1650 assert!(parser.try_raw_args().is_none());
1651 assert!(parser.raw_args().is_err());
1652 // Recovery possible
1653 assert!(parser.raw_args().is_ok());
1654 assert!(parser.try_raw_args().is_some());
1655 }
1656
1657 {
1658 let mut parser = parser.clone();
1659 assert!(parser.optional_value().is_some());
1660 exhaust(parser, depth + 1);
1661 }
1662 } else {
1663 let prev_state = parser.state.clone();
1664 let prev_remaining = parser.source.as_slice().len();
1665 assert!(parser.optional_value().is_none());
1666 assert!(parser.raw_args().is_ok());
1667 assert!(parser.try_raw_args().is_some());
1668 // Verify state transitions
1669 match prev_state {
1670 State::None | State::PendingValue(_) => {
1671 assert_matches!(parser.state, State::None);
1672 }
1673 State::Shorts(arg, pos) => {
1674 assert_eq!(pos, arg.len());
1675 assert_matches!(parser.state, State::None);
1676 }
1677 #[cfg(windows)]
1678 State::ShortsU16(arg, pos) => {
1679 assert_eq!(pos, arg.len());
1680 assert_matches!(parser.state, State::None);
1681 }
1682 State::FinishedOpts => assert_matches!(parser.state, State::FinishedOpts),
1683 }
1684 // No arguments were consumed
1685 assert_eq!(parser.source.as_slice().len(), prev_remaining);
1686 }
1687
1688 {
1689 let mut parser = parser.clone();
1690 match parser.next() {
1691 Ok(None) => {
1692 assert_matches!(parser.state, State::None | State::FinishedOpts);
1693 assert_eq!(parser.source.as_slice().len(), 0);
1694 }
1695 _ => exhaust(parser, depth + 1),
1696 }
1697 }
1698
1699 {
1700 let mut parser = parser.clone();
1701 match parser.value() {
1702 Err(_) => {
1703 assert_matches!(parser.state, State::None | State::FinishedOpts);
1704 assert_eq!(parser.source.as_slice().len(), 0);
1705 }
1706 Ok(_) => {
1707 assert_matches!(parser.state, State::None | State::FinishedOpts);
1708 exhaust(parser, depth + 1);
1709 }
1710 }
1711 }
1712
1713 {
1714 match parser.values() {
1715 Err(_) => (),
1716 Ok(iter) => {
1717 assert!(iter.count() > 0);
1718 exhaust(parser, depth + 1);
1719 }
1720 }
1721 }
1722 }
1723}
1724