| 1 | //! A pathologically simple command line argument parser. |
| 2 | //! |
| 3 | //! Most argument parsers are declarative: you tell them what to parse, |
| 4 | //! and they do it. |
| 5 | //! |
| 6 | //! This one provides you with a stream of options and values and lets you |
| 7 | //! figure out the rest. |
| 8 | //! |
| 9 | //! ## Example |
| 10 | //! ```no_run |
| 11 | //! struct Args { |
| 12 | //! thing: String, |
| 13 | //! number: u32, |
| 14 | //! shout: bool, |
| 15 | //! } |
| 16 | //! |
| 17 | //! fn parse_args() -> Result<Args, lexopt::Error> { |
| 18 | //! use lexopt::prelude::*; |
| 19 | //! |
| 20 | //! let mut thing = None; |
| 21 | //! let mut number = 1; |
| 22 | //! let mut shout = false; |
| 23 | //! let mut parser = lexopt::Parser::from_env(); |
| 24 | //! while let Some(arg) = parser.next()? { |
| 25 | //! match arg { |
| 26 | //! Short('n' ) | Long("number" ) => { |
| 27 | //! number = parser.value()?.parse()?; |
| 28 | //! } |
| 29 | //! Long("shout" ) => { |
| 30 | //! shout = true; |
| 31 | //! } |
| 32 | //! Value(val) if thing.is_none() => { |
| 33 | //! thing = Some(val.string()?); |
| 34 | //! } |
| 35 | //! Long("help" ) => { |
| 36 | //! println!("Usage: hello [-n|--number=NUM] [--shout] THING" ); |
| 37 | //! std::process::exit(0); |
| 38 | //! } |
| 39 | //! _ => return Err(arg.unexpected()), |
| 40 | //! } |
| 41 | //! } |
| 42 | //! |
| 43 | //! Ok(Args { |
| 44 | //! thing: thing.ok_or("missing argument THING" )?, |
| 45 | //! number, |
| 46 | //! shout, |
| 47 | //! }) |
| 48 | //! } |
| 49 | //! |
| 50 | //! fn main() -> Result<(), lexopt::Error> { |
| 51 | //! let args = parse_args()?; |
| 52 | //! let mut message = format!("Hello {}" , args.thing); |
| 53 | //! if args.shout { |
| 54 | //! message = message.to_uppercase(); |
| 55 | //! } |
| 56 | //! for _ in 0..args.number { |
| 57 | //! println!("{}" , message); |
| 58 | //! } |
| 59 | //! Ok(()) |
| 60 | //! } |
| 61 | //! ``` |
| 62 | //! Let's walk through this: |
| 63 | //! - We start parsing with [`Parser::from_env`]. |
| 64 | //! - We call [`parser.next()`][Parser::next] in a loop to get all the arguments until they run out. |
| 65 | //! - We match on arguments. [`Short`][Arg::Short] and [`Long`][Arg::Long] indicate an option. |
| 66 | //! - To get the value that belongs to an option (like `10` in `-n 10`) we call [`parser.value()`][Parser::value]. |
| 67 | //! - This returns a standard [`OsString`][std::ffi::OsString]. |
| 68 | //! - For convenience, [`use lexopt::prelude::*`][prelude] adds a [`.parse()`][ValueExt::parse] method, analogous to [`str::parse`]. |
| 69 | //! - Calling `parser.value()` is how we tell `Parser` that `-n` takes a value at all. |
| 70 | //! - `Value` indicates a free-standing argument. |
| 71 | //! - `if thing.is_none()` is a useful pattern for positional arguments. If we already found `thing` we pass it on to another case. |
| 72 | //! - It also contains an `OsString`. |
| 73 | //! - The [`.string()`][ValueExt::string] method decodes it into a plain `String`. |
| 74 | //! - If we don't know what to do with an argument we use [`return Err(arg.unexpected())`][Arg::unexpected] to turn it into an error message. |
| 75 | //! - Strings can be promoted to errors for custom error messages. |
| 76 | |
| 77 | #![forbid (unsafe_code)] |
| 78 | #![warn (missing_docs, missing_debug_implementations, elided_lifetimes_in_paths)] |
| 79 | #![allow (clippy::should_implement_trait)] |
| 80 | |
| 81 | use std::{ |
| 82 | ffi::{OsStr, OsString}, |
| 83 | fmt::Display, |
| 84 | mem::replace, |
| 85 | str::FromStr, |
| 86 | }; |
| 87 | |
| 88 | #[cfg (unix)] |
| 89 | use std::os::unix::ffi::{OsStrExt, OsStringExt}; |
| 90 | #[cfg (target_os = "wasi" )] |
| 91 | use std::os::wasi::ffi::{OsStrExt, OsStringExt}; |
| 92 | #[cfg (windows)] |
| 93 | use std::os::windows::ffi::{OsStrExt, OsStringExt}; |
| 94 | |
| 95 | type InnerIter = std::vec::IntoIter<OsString>; |
| 96 | |
| 97 | fn make_iter(iter: impl Iterator<Item = OsString>) -> InnerIter { |
| 98 | iter.collect::<Vec<_>>().into_iter() |
| 99 | } |
| 100 | |
| 101 | /// A parser for command line arguments. |
| 102 | #[derive (Debug, Clone)] |
| 103 | pub struct Parser { |
| 104 | source: InnerIter, |
| 105 | state: State, |
| 106 | /// The last option we emitted. |
| 107 | last_option: LastOption, |
| 108 | /// The name of the command (argv\[0\]). |
| 109 | bin_name: Option<String>, |
| 110 | } |
| 111 | |
| 112 | #[derive (Debug, Clone)] |
| 113 | enum State { |
| 114 | /// Nothing interesting is going on. |
| 115 | None, |
| 116 | /// We have a value left over from --option=value. |
| 117 | PendingValue(OsString), |
| 118 | /// We're in the middle of -abc. |
| 119 | /// |
| 120 | /// On Windows and other non-UTF8-OsString platforms this Vec should |
| 121 | /// only ever contain valid UTF-8 (and could instead be a String). |
| 122 | Shorts(Vec<u8>, usize), |
| 123 | #[cfg (windows)] |
| 124 | /// We're in the middle of -ab� on Windows (invalid UTF-16). |
| 125 | ShortsU16(Vec<u16>, usize), |
| 126 | /// We saw -- and know no more options are coming. |
| 127 | FinishedOpts, |
| 128 | } |
| 129 | |
| 130 | /// We use this to keep track of the last emitted option, for error messages when |
| 131 | /// an expected value is not found. |
| 132 | /// |
| 133 | /// We also use this as storage for long options so we can hand out &str |
| 134 | /// (because String doesn't support pattern matching). |
| 135 | #[derive (Debug, Clone)] |
| 136 | enum LastOption { |
| 137 | None, |
| 138 | Short(char), |
| 139 | Long(String), |
| 140 | } |
| 141 | |
| 142 | /// A command line argument found by [`Parser`], either an option or a positional argument. |
| 143 | #[derive (Debug, Clone, PartialEq, Eq)] |
| 144 | pub enum Arg<'a> { |
| 145 | /// A short option, e.g. `Short('q')` for `-q`. |
| 146 | Short(char), |
| 147 | /// A long option, e.g. `Long("verbose")` for `--verbose`. (The dashes are not included.) |
| 148 | Long(&'a str), |
| 149 | /// A positional argument, e.g. `/dev/null`. |
| 150 | Value(OsString), |
| 151 | } |
| 152 | |
| 153 | impl Parser { |
| 154 | /// Get the next option or positional argument. |
| 155 | /// |
| 156 | /// A return value of `Ok(None)` means the command line has been exhausted. |
| 157 | /// |
| 158 | /// Options that are not valid unicode are transformed with replacement |
| 159 | /// characters as by [`String::from_utf8_lossy`]. |
| 160 | /// |
| 161 | /// # Errors |
| 162 | /// |
| 163 | /// [`Error::UnexpectedValue`] is returned if the last option had a |
| 164 | /// value that hasn't been consumed, as in `--option=value` or `-o=value`. |
| 165 | /// |
| 166 | /// It's possible to continue parsing after an error (but this is rarely useful). |
| 167 | pub fn next(&mut self) -> Result<Option<Arg<'_>>, Error> { |
| 168 | match self.state { |
| 169 | State::PendingValue(ref mut value) => { |
| 170 | // Last time we got `--long=value`, and `value` hasn't been used. |
| 171 | let value = replace(value, OsString::new()); |
| 172 | self.state = State::None; |
| 173 | return Err(Error::UnexpectedValue { |
| 174 | option: self |
| 175 | .format_last_option() |
| 176 | .expect("Should only have pending value after long option" ), |
| 177 | value, |
| 178 | }); |
| 179 | } |
| 180 | State::Shorts(ref arg, ref mut pos) => { |
| 181 | // We're somewhere inside a -abc chain. Because we're in .next(), |
| 182 | // not .value(), we can assume that the next character is another option. |
| 183 | match first_codepoint(&arg[*pos..]) { |
| 184 | Ok(None) => { |
| 185 | self.state = State::None; |
| 186 | } |
| 187 | // If we find "-=[...]" we interpret it as an option. |
| 188 | // If we find "-o=..." then there's an unexpected value. |
| 189 | // ('-=' as an option exists, see https://linux.die.net/man/1/a2ps.) |
| 190 | // clap always interprets it as a short flag in this case, but |
| 191 | // that feels sloppy. |
| 192 | Ok(Some('=' )) if *pos > 1 => { |
| 193 | return Err(Error::UnexpectedValue { |
| 194 | option: self.format_last_option().unwrap(), |
| 195 | value: self.optional_value().unwrap(), |
| 196 | }); |
| 197 | } |
| 198 | Ok(Some(ch)) => { |
| 199 | *pos += ch.len_utf8(); |
| 200 | self.last_option = LastOption::Short(ch); |
| 201 | return Ok(Some(Arg::Short(ch))); |
| 202 | } |
| 203 | Err(_) => { |
| 204 | // Advancing may allow recovery. |
| 205 | // This is a little iffy, there might be more bad unicode next. |
| 206 | // The standard library may turn multiple bytes into a single |
| 207 | // replacement character, but we don't imitate that. |
| 208 | *pos += 1; |
| 209 | self.last_option = LastOption::Short('�' ); |
| 210 | return Ok(Some(Arg::Short('�' ))); |
| 211 | } |
| 212 | } |
| 213 | } |
| 214 | #[cfg (windows)] |
| 215 | State::ShortsU16(ref arg, ref mut pos) => match first_utf16_codepoint(&arg[*pos..]) { |
| 216 | Ok(None) => { |
| 217 | self.state = State::None; |
| 218 | } |
| 219 | Ok(Some('=' )) if *pos > 1 => { |
| 220 | return Err(Error::UnexpectedValue { |
| 221 | option: self.format_last_option().unwrap(), |
| 222 | value: self.optional_value().unwrap(), |
| 223 | }); |
| 224 | } |
| 225 | Ok(Some(ch)) => { |
| 226 | *pos += ch.len_utf16(); |
| 227 | self.last_option = LastOption::Short(ch); |
| 228 | return Ok(Some(Arg::Short(ch))); |
| 229 | } |
| 230 | Err(_) => { |
| 231 | *pos += 1; |
| 232 | self.last_option = LastOption::Short('�' ); |
| 233 | return Ok(Some(Arg::Short('�' ))); |
| 234 | } |
| 235 | }, |
| 236 | State::FinishedOpts => { |
| 237 | return Ok(self.source.next().map(Arg::Value)); |
| 238 | } |
| 239 | State::None => (), |
| 240 | } |
| 241 | |
| 242 | match self.state { |
| 243 | State::None => (), |
| 244 | ref state => panic!("unexpected state {:?}" , state), |
| 245 | } |
| 246 | |
| 247 | let arg = match self.source.next() { |
| 248 | Some(arg) => arg, |
| 249 | None => return Ok(None), |
| 250 | }; |
| 251 | |
| 252 | if arg == "--" { |
| 253 | self.state = State::FinishedOpts; |
| 254 | return self.next(); |
| 255 | } |
| 256 | |
| 257 | #[cfg (any(unix, target_os = "wasi" ))] |
| 258 | { |
| 259 | // Fast solution for platforms where OsStrings are just UTF-8-ish bytes |
| 260 | let mut arg = arg.into_vec(); |
| 261 | if arg.starts_with(b"--" ) { |
| 262 | // Long options have two forms: --option and --option=value. |
| 263 | if let Some(ind) = arg.iter().position(|&b| b == b'=' ) { |
| 264 | // The value can be an OsString... |
| 265 | self.state = State::PendingValue(OsString::from_vec(arg[ind + 1..].into())); |
| 266 | arg.truncate(ind); |
| 267 | } |
| 268 | // ...but the option has to be a string. |
| 269 | // String::from_utf8_lossy().into_owned() would work, but its |
| 270 | // return type is Cow: if the original was valid a borrowed |
| 271 | // version is returned, and then into_owned() does an |
| 272 | // unnecessary copy. |
| 273 | // By trying String::from_utf8 first we avoid that copy if arg |
| 274 | // is already UTF-8 (which is most of the time). |
| 275 | // reqwest does a similar maneuver more efficiently with unsafe: |
| 276 | // https://github.com/seanmonstar/reqwest/blob/e6a1a09f0904e06de4ff1317278798c4ed28af66/src/async_impl/response.rs#L194 |
| 277 | let option = match String::from_utf8(arg) { |
| 278 | Ok(text) => text, |
| 279 | Err(err) => String::from_utf8_lossy(err.as_bytes()).into_owned(), |
| 280 | }; |
| 281 | Ok(Some(self.set_long(option))) |
| 282 | } else if arg.len() > 1 && arg[0] == b'-' { |
| 283 | self.state = State::Shorts(arg, 1); |
| 284 | self.next() |
| 285 | } else { |
| 286 | Ok(Some(Arg::Value(OsString::from_vec(arg)))) |
| 287 | } |
| 288 | } |
| 289 | |
| 290 | #[cfg (not(any(unix, target_os = "wasi" )))] |
| 291 | { |
| 292 | // Platforms where looking inside an OsString is harder |
| 293 | |
| 294 | #[cfg (windows)] |
| 295 | { |
| 296 | // Fast path for Windows |
| 297 | let mut bytes = arg.encode_wide(); |
| 298 | const DASH: u16 = b'-' as u16; |
| 299 | match (bytes.next(), bytes.next()) { |
| 300 | (Some(DASH), Some(_)) => { |
| 301 | // This is an option, we'll have to do more work. |
| 302 | // (We already checked for "--" earlier.) |
| 303 | } |
| 304 | _ => { |
| 305 | // Just a value, return early. |
| 306 | return Ok(Some(Arg::Value(arg))); |
| 307 | } |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | let mut arg = match arg.into_string() { |
| 312 | Ok(arg) => arg, |
| 313 | Err(arg) => { |
| 314 | // The argument is not valid unicode. |
| 315 | // If it's an option we'll have to do something nasty, |
| 316 | // otherwise we can return it as-is. |
| 317 | |
| 318 | #[cfg (windows)] |
| 319 | { |
| 320 | // On Windows we can only get here if this is an option, otherwise |
| 321 | // we return earlier. |
| 322 | // Unlike on Unix, we can't efficiently process invalid unicode. |
| 323 | // Semantically it's UTF-16, but internally it's WTF-8 (a superset of UTF-8). |
| 324 | // So we only process the raw version here, when we know we really have to. |
| 325 | let mut arg: Vec<u16> = arg.encode_wide().collect(); |
| 326 | const DASH: u16 = b'-' as u16; |
| 327 | const EQ: u16 = b'=' as u16; |
| 328 | if arg.starts_with(&[DASH, DASH]) { |
| 329 | if let Some(ind) = arg.iter().position(|&u| u == EQ) { |
| 330 | self.state = |
| 331 | State::PendingValue(OsString::from_wide(&arg[ind + 1..])); |
| 332 | arg.truncate(ind); |
| 333 | } |
| 334 | let long = self.set_long(String::from_utf16_lossy(&arg)); |
| 335 | return Ok(Some(long)); |
| 336 | } else { |
| 337 | assert!(arg.len() > 1); |
| 338 | assert_eq!(arg[0], DASH); |
| 339 | self.state = State::ShortsU16(arg, 1); |
| 340 | return self.next(); |
| 341 | } |
| 342 | }; |
| 343 | |
| 344 | #[cfg (not(windows))] |
| 345 | { |
| 346 | // This code may be reachable on Hermit and SGX, but probably |
| 347 | // not on wasm32-unknown-unknown, which is unfortunate as that's |
| 348 | // the only one we can easily test. |
| 349 | |
| 350 | // This allocates unconditionally, sadly. |
| 351 | let text = arg.to_string_lossy(); |
| 352 | if text.starts_with('-' ) { |
| 353 | // Use the lossily patched version and hope for the best. |
| 354 | // This may be incorrect behavior. Our only other option |
| 355 | // is an error but I don't want to write complicated code |
| 356 | // I can't actually test. |
| 357 | // Please open an issue if this behavior affects you! |
| 358 | text.into_owned() |
| 359 | } else { |
| 360 | // It didn't look like an option, so return it as a value. |
| 361 | return Ok(Some(Arg::Value(arg))); |
| 362 | } |
| 363 | } |
| 364 | } |
| 365 | }; |
| 366 | |
| 367 | // The argument is valid unicode. This is the ideal version of the |
| 368 | // code, the previous mess was purely to deal with invalid unicode. |
| 369 | if arg.starts_with("--" ) { |
| 370 | if let Some(ind) = arg.find('=' ) { |
| 371 | self.state = State::PendingValue(arg[ind + 1..].into()); |
| 372 | arg.truncate(ind); |
| 373 | } |
| 374 | Ok(Some(self.set_long(arg))) |
| 375 | } else if arg.starts_with('-' ) && arg != "-" { |
| 376 | self.state = State::Shorts(arg.into(), 1); |
| 377 | self.next() |
| 378 | } else { |
| 379 | Ok(Some(Arg::Value(arg.into()))) |
| 380 | } |
| 381 | } |
| 382 | } |
| 383 | |
| 384 | /// Get a value for an option. |
| 385 | /// |
| 386 | /// This function should normally be called right after seeing an option |
| 387 | /// that expects a value, with positional arguments being collected |
| 388 | /// using [`next()`][Parser::next]. |
| 389 | /// |
| 390 | /// A value is collected even if it looks like an option |
| 391 | /// (i.e., starts with `-`). |
| 392 | /// |
| 393 | /// # Errors |
| 394 | /// |
| 395 | /// An [`Error::MissingValue`] is returned if the end of the command |
| 396 | /// line is reached. |
| 397 | pub fn value(&mut self) -> Result<OsString, Error> { |
| 398 | if let Some(value) = self.optional_value() { |
| 399 | return Ok(value); |
| 400 | } |
| 401 | |
| 402 | if let Some(value) = self.source.next() { |
| 403 | return Ok(value); |
| 404 | } |
| 405 | |
| 406 | Err(Error::MissingValue { |
| 407 | option: self.format_last_option(), |
| 408 | }) |
| 409 | } |
| 410 | |
| 411 | /// Gather multiple values for an option. |
| 412 | /// |
| 413 | /// This is used for options that take multiple arguments, such as a |
| 414 | /// `--command` flag that's invoked as `app --command echo 'Hello world'`. |
| 415 | /// |
| 416 | /// It will gather arguments until another option is found, or `--` is found, or |
| 417 | /// the end of the command line is reached. This differs from `.value()`, which |
| 418 | /// takes a value even if it looks like an option. |
| 419 | /// |
| 420 | /// An equals sign (`=`) will limit this to a single value. That means `-a=b c` and |
| 421 | /// `--opt=b c` will only yield `"b"` while `-a b c`, `-ab c` and `--opt b c` will |
| 422 | /// yield `"b"`, `"c"`. |
| 423 | /// |
| 424 | /// # Errors |
| 425 | /// If not at least one value is found then [`Error::MissingValue`] is returned. |
| 426 | /// |
| 427 | /// # Example |
| 428 | /// ``` |
| 429 | /// # fn main() -> Result<(), lexopt::Error> { |
| 430 | /// # use lexopt::prelude::*; |
| 431 | /// # use std::ffi::OsString; |
| 432 | /// # use std::path::PathBuf; |
| 433 | /// # let mut parser = lexopt::Parser::from_args(&["a" , "b" , "-x" , "one" , "two" , "three" , "four" ]); |
| 434 | /// let arguments: Vec<OsString> = parser.values()?.collect(); |
| 435 | /// # assert_eq!(arguments, &["a" , "b" ]); |
| 436 | /// # let _ = parser.next(); |
| 437 | /// let at_most_three_files: Vec<PathBuf> = parser.values()?.take(3).map(Into::into).collect(); |
| 438 | /// # assert_eq!(parser.raw_args()?.as_slice(), &["four" ]); |
| 439 | /// for value in parser.values()? { |
| 440 | /// // ... |
| 441 | /// } |
| 442 | /// # Ok(()) } |
| 443 | /// ``` |
| 444 | pub fn values(&mut self) -> Result<ValuesIter<'_>, Error> { |
| 445 | // This code is designed so that just calling .values() doesn't consume |
| 446 | // any arguments as long as you don't use the iterator. It used to work |
| 447 | // differently. |
| 448 | // "--" is treated like an option and not consumed. This seems to me the |
| 449 | // least unreasonable behavior, and it's the easiest to implement. |
| 450 | if self.has_pending() || self.next_is_normal() { |
| 451 | Ok(ValuesIter { |
| 452 | took_first: false, |
| 453 | parser: Some(self), |
| 454 | }) |
| 455 | } else { |
| 456 | Err(Error::MissingValue { |
| 457 | option: self.format_last_option(), |
| 458 | }) |
| 459 | } |
| 460 | } |
| 461 | |
| 462 | /// Inspect an argument and consume it if it's "normal" (not an option or --). |
| 463 | /// |
| 464 | /// Used by [`Parser::values`]. |
| 465 | /// |
| 466 | /// This method should not be called while partway through processing an |
| 467 | /// argument. |
| 468 | fn next_if_normal(&mut self) -> Option<OsString> { |
| 469 | if self.next_is_normal() { |
| 470 | self.source.next() |
| 471 | } else { |
| 472 | None |
| 473 | } |
| 474 | } |
| 475 | |
| 476 | /// Execute the check for next_if_normal(). |
| 477 | fn next_is_normal(&self) -> bool { |
| 478 | assert!(!self.has_pending()); |
| 479 | let arg = match self.source.as_slice().first() { |
| 480 | // There has to be a next argument. |
| 481 | None => return false, |
| 482 | Some(arg) => arg, |
| 483 | }; |
| 484 | if let State::FinishedOpts = self.state { |
| 485 | // If we already found a -- then we're really not supposed to be here, |
| 486 | // but we shouldn't treat the next argument as an option. |
| 487 | return true; |
| 488 | } |
| 489 | if arg == "-" { |
| 490 | // "-" is the one argument with a leading '-' that's allowed. |
| 491 | return true; |
| 492 | } |
| 493 | #[cfg (any(unix, target_os = "wasi" ))] |
| 494 | let lead_dash = arg.as_bytes().first() == Some(&b'-' ); |
| 495 | #[cfg (windows)] |
| 496 | let lead_dash = arg.encode_wide().next() == Some(b'-' as u16); |
| 497 | #[cfg (not(any(unix, target_os = "wasi" , windows)))] |
| 498 | let lead_dash = arg.to_string_lossy().as_bytes().first() == Some(&b'-' ); |
| 499 | |
| 500 | !lead_dash |
| 501 | } |
| 502 | |
| 503 | /// Take raw arguments from the original command line. |
| 504 | /// |
| 505 | /// This returns an iterator of [`OsString`]s. Any arguments that are not |
| 506 | /// consumed are kept, so you can continue parsing after you're done with |
| 507 | /// the iterator. |
| 508 | /// |
| 509 | /// To inspect an argument without consuming it, use [`RawArgs::peek`] or |
| 510 | /// [`RawArgs::as_slice`]. |
| 511 | /// |
| 512 | /// # Errors |
| 513 | /// |
| 514 | /// Returns an [`Error::UnexpectedValue`] if the last option had a left-over |
| 515 | /// argument, as in `--option=value`, `-ovalue`, or if it was midway through |
| 516 | /// an option chain, as in `-abc`. The iterator only yields whole arguments. |
| 517 | /// To avoid this, use [`try_raw_args()`][Parser::try_raw_args]. |
| 518 | /// |
| 519 | /// After this error the method is guaranteed to succeed, as it consumes the |
| 520 | /// rest of the argument. |
| 521 | /// |
| 522 | /// # Example |
| 523 | /// As soon as a free-standing argument is found, consume the other arguments |
| 524 | /// as-is, and build them into a command. |
| 525 | /// ``` |
| 526 | /// # fn main() -> Result<(), lexopt::Error> { |
| 527 | /// # use lexopt::prelude::*; |
| 528 | /// # use std::ffi::OsString; |
| 529 | /// # use std::path::PathBuf; |
| 530 | /// # let mut parser = lexopt::Parser::from_args(&["-x" , "echo" , "-n" , "'Hello, world'" ]); |
| 531 | /// # while let Some(arg) = parser.next()? { |
| 532 | /// # match arg { |
| 533 | /// Value(prog) => { |
| 534 | /// let args: Vec<_> = parser.raw_args()?.collect(); |
| 535 | /// let command = std::process::Command::new(prog).args(args); |
| 536 | /// } |
| 537 | /// # _ => (), }} Ok(()) } |
| 538 | pub fn raw_args(&mut self) -> Result<RawArgs<'_>, Error> { |
| 539 | if let Some(value) = self.optional_value() { |
| 540 | return Err(Error::UnexpectedValue { |
| 541 | option: self.format_last_option().unwrap(), |
| 542 | value, |
| 543 | }); |
| 544 | } |
| 545 | |
| 546 | Ok(RawArgs(&mut self.source)) |
| 547 | } |
| 548 | |
| 549 | /// Take raw arguments from the original command line, *if* the current argument |
| 550 | /// has finished processing. |
| 551 | /// |
| 552 | /// Unlike [`raw_args()`][Parser::raw_args] this does not consume any value |
| 553 | /// in case of a left-over argument. This makes it safe to call at any time. |
| 554 | /// |
| 555 | /// It returns `None` exactly when [`optional_value()`][Parser::optional_value] |
| 556 | /// would return `Some`. |
| 557 | /// |
| 558 | /// Note: If no arguments are left then it returns an empty iterator (not `None`). |
| 559 | /// |
| 560 | /// # Example |
| 561 | /// Process arguments of the form `-123` as numbers. For a complete runnable version of |
| 562 | /// this example, see |
| 563 | /// [`examples/nonstandard.rs`](https://github.com/blyxxyz/lexopt/blob/e3754e6f24506afb42394602fc257b1ad9258d84/examples/nonstandard.rs). |
| 564 | /// ``` |
| 565 | /// # fn main() -> Result<(), lexopt::Error> { |
| 566 | /// # use lexopt::prelude::*; |
| 567 | /// # use std::ffi::OsString; |
| 568 | /// # use std::path::PathBuf; |
| 569 | /// # let mut parser = lexopt::Parser::from_iter(&["-13" ]); |
| 570 | /// fn parse_dashnum(parser: &mut lexopt::Parser) -> Option<u64> { |
| 571 | /// let mut raw = parser.try_raw_args()?; |
| 572 | /// let arg = raw.peek()?.to_str()?; |
| 573 | /// let num = arg.strip_prefix('-' )?.parse::<u64>().ok()?; |
| 574 | /// raw.next(); // Consume the argument we just parsed |
| 575 | /// Some(num) |
| 576 | /// } |
| 577 | /// |
| 578 | /// loop { |
| 579 | /// if let Some(num) = parse_dashnum(&mut parser) { |
| 580 | /// println!("Got number {}" , num); |
| 581 | /// } else if let Some(arg) = parser.next()? { |
| 582 | /// match arg { |
| 583 | /// // ... |
| 584 | /// # _ => (), |
| 585 | /// } |
| 586 | /// } else { |
| 587 | /// break; |
| 588 | /// } |
| 589 | /// } |
| 590 | /// # Ok(()) } |
| 591 | /// ``` |
| 592 | pub fn try_raw_args(&mut self) -> Option<RawArgs<'_>> { |
| 593 | if self.has_pending() { |
| 594 | None |
| 595 | } else { |
| 596 | Some(RawArgs(&mut self.source)) |
| 597 | } |
| 598 | } |
| 599 | |
| 600 | /// Check whether we're halfway through an argument, or in other words, |
| 601 | /// if [`Parser::optional_value()`] would return `Some`. |
| 602 | fn has_pending(&self) -> bool { |
| 603 | match self.state { |
| 604 | State::None | State::FinishedOpts => false, |
| 605 | State::PendingValue(_) => true, |
| 606 | State::Shorts(ref arg, pos) => pos < arg.len(), |
| 607 | #[cfg (windows)] |
| 608 | State::ShortsU16(ref arg, pos) => pos < arg.len(), |
| 609 | } |
| 610 | } |
| 611 | |
| 612 | #[inline (never)] |
| 613 | fn format_last_option(&self) -> Option<String> { |
| 614 | match self.last_option { |
| 615 | LastOption::None => None, |
| 616 | LastOption::Short(ch) => Some(format!("- {}" , ch)), |
| 617 | LastOption::Long(ref option) => Some(option.clone()), |
| 618 | } |
| 619 | } |
| 620 | |
| 621 | /// The name of the command, as in the zeroth argument of the process. |
| 622 | /// |
| 623 | /// This is intended for use in messages. If the name is not valid unicode |
| 624 | /// it will be sanitized with replacement characters as by |
| 625 | /// [`String::from_utf8_lossy`]. |
| 626 | /// |
| 627 | /// To get the current executable, use [`std::env::current_exe`]. |
| 628 | /// |
| 629 | /// # Example |
| 630 | /// ``` |
| 631 | /// let mut parser = lexopt::Parser::from_env(); |
| 632 | /// let bin_name = parser.bin_name().unwrap_or("myapp" ); |
| 633 | /// println!("{}: Some message" , bin_name); |
| 634 | /// ``` |
| 635 | pub fn bin_name(&self) -> Option<&str> { |
| 636 | Some(self.bin_name.as_ref()?) |
| 637 | } |
| 638 | |
| 639 | /// Get a value only if it's concatenated to an option, as in `-ovalue` or |
| 640 | /// `--option=value` or `-o=value`, but not `-o value` or `--option value`. |
| 641 | pub fn optional_value(&mut self) -> Option<OsString> { |
| 642 | Some(self.raw_optional_value()?.0) |
| 643 | } |
| 644 | |
| 645 | /// [`Parser::optional_value`], but indicate whether the value was joined |
| 646 | /// with an = sign. This matters for [`Parser::values`]. |
| 647 | fn raw_optional_value(&mut self) -> Option<(OsString, bool)> { |
| 648 | match replace(&mut self.state, State::None) { |
| 649 | State::PendingValue(value) => Some((value, true)), |
| 650 | State::Shorts(mut arg, mut pos) => { |
| 651 | if pos >= arg.len() { |
| 652 | return None; |
| 653 | } |
| 654 | let mut had_eq_sign = false; |
| 655 | if arg[pos] == b'=' { |
| 656 | // -o=value. |
| 657 | // clap actually strips out all leading '='s, but that seems silly. |
| 658 | // We allow `-xo=value`. Python's argparse doesn't strip the = in that case. |
| 659 | pos += 1; |
| 660 | had_eq_sign = true; |
| 661 | } |
| 662 | arg.drain(..pos); // Reuse allocation |
| 663 | #[cfg (any(unix, target_os = "wasi" ))] |
| 664 | { |
| 665 | Some((OsString::from_vec(arg), had_eq_sign)) |
| 666 | } |
| 667 | #[cfg (not(any(unix, target_os = "wasi" )))] |
| 668 | { |
| 669 | let arg = String::from_utf8(arg) |
| 670 | .expect("short option args on exotic platforms must be unicode" ); |
| 671 | Some((arg.into(), had_eq_sign)) |
| 672 | } |
| 673 | } |
| 674 | #[cfg (windows)] |
| 675 | State::ShortsU16(arg, mut pos) => { |
| 676 | if pos >= arg.len() { |
| 677 | return None; |
| 678 | } |
| 679 | let mut had_eq_sign = false; |
| 680 | if arg[pos] == b'=' as u16 { |
| 681 | pos += 1; |
| 682 | had_eq_sign = true; |
| 683 | } |
| 684 | Some((OsString::from_wide(&arg[pos..]), had_eq_sign)) |
| 685 | } |
| 686 | State::FinishedOpts => { |
| 687 | // Not really supposed to be here, but it's benign and not our fault |
| 688 | self.state = State::FinishedOpts; |
| 689 | None |
| 690 | } |
| 691 | State::None => None, |
| 692 | } |
| 693 | } |
| 694 | |
| 695 | fn new(bin_name: Option<OsString>, source: InnerIter) -> Parser { |
| 696 | Parser { |
| 697 | source, |
| 698 | state: State::None, |
| 699 | last_option: LastOption::None, |
| 700 | bin_name: bin_name.map(|s| match s.into_string() { |
| 701 | Ok(text) => text, |
| 702 | Err(text) => text.to_string_lossy().into_owned(), |
| 703 | }), |
| 704 | } |
| 705 | } |
| 706 | |
| 707 | /// Create a parser from the environment using [`std::env::args_os`]. |
| 708 | /// |
| 709 | /// This is the usual way to create a `Parser`. |
| 710 | pub fn from_env() -> Parser { |
| 711 | let mut source = make_iter(std::env::args_os()); |
| 712 | Parser::new(source.next(), source) |
| 713 | } |
| 714 | |
| 715 | // The collision with `FromIterator::from_iter` is a bit unfortunate. |
| 716 | // This name is used because: |
| 717 | // - `from_args()` was taken, and changing its behavior without changing |
| 718 | // its signature would be evil. |
| 719 | // - structopt also had a method by that name, so there's a precedent. |
| 720 | // (clap_derive doesn't.) |
| 721 | // - I couldn't think of a better one. |
| 722 | // When this name was chosen `FromIterator` could not actually be implemented. |
| 723 | // It can be implemented now, but I'm not sure there's a reason to. |
| 724 | |
| 725 | /// Create a parser from an iterator. This is useful for testing among other things. |
| 726 | /// |
| 727 | /// The first item from the iterator **must** be the binary name, as from [`std::env::args_os`]. |
| 728 | /// |
| 729 | /// The iterator is consumed immediately. |
| 730 | /// |
| 731 | /// # Example |
| 732 | /// ``` |
| 733 | /// let mut parser = lexopt::Parser::from_iter(&["myapp" , "-n" , "10" , "./foo.bar" ]); |
| 734 | /// ``` |
| 735 | pub fn from_iter<I>(args: I) -> Parser |
| 736 | where |
| 737 | I: IntoIterator, |
| 738 | I::Item: Into<OsString>, |
| 739 | { |
| 740 | let mut args = make_iter(args.into_iter().map(Into::into)); |
| 741 | Parser::new(args.next(), args) |
| 742 | } |
| 743 | |
| 744 | /// Create a parser from an iterator that does **not** include the binary name. |
| 745 | /// |
| 746 | /// The iterator is consumed immediately. |
| 747 | /// |
| 748 | /// [`bin_name()`](`Parser::bin_name`) will return `None`. Consider using |
| 749 | /// [`Parser::from_iter`] instead. |
| 750 | pub fn from_args<I>(args: I) -> Parser |
| 751 | where |
| 752 | I: IntoIterator, |
| 753 | I::Item: Into<OsString>, |
| 754 | { |
| 755 | Parser::new(None, make_iter(args.into_iter().map(Into::into))) |
| 756 | } |
| 757 | |
| 758 | /// Store a long option so the caller can borrow it. |
| 759 | fn set_long(&mut self, option: String) -> Arg<'_> { |
| 760 | self.last_option = LastOption::Long(option); |
| 761 | match self.last_option { |
| 762 | LastOption::Long(ref option) => Arg::Long(&option[2..]), |
| 763 | _ => unreachable!(), |
| 764 | } |
| 765 | } |
| 766 | } |
| 767 | |
| 768 | impl Arg<'_> { |
| 769 | /// Convert an unexpected argument into an error. |
| 770 | pub fn unexpected(self) -> Error { |
| 771 | match self { |
| 772 | Arg::Short(short: char) => Error::UnexpectedOption(format!("- {}" , short)), |
| 773 | Arg::Long(long: &str) => Error::UnexpectedOption(format!("-- {}" , long)), |
| 774 | Arg::Value(value: OsString) => Error::UnexpectedArgument(value), |
| 775 | } |
| 776 | } |
| 777 | } |
| 778 | |
| 779 | /// An iterator for multiple option-arguments, returned by [`Parser::values`]. |
| 780 | /// |
| 781 | /// It's guaranteed to yield at least one value. |
| 782 | #[derive (Debug)] |
| 783 | pub struct ValuesIter<'a> { |
| 784 | took_first: bool, |
| 785 | parser: Option<&'a mut Parser>, |
| 786 | } |
| 787 | |
| 788 | impl Iterator for ValuesIter<'_> { |
| 789 | type Item = OsString; |
| 790 | |
| 791 | fn next(&mut self) -> Option<Self::Item> { |
| 792 | let parser: &mut &mut Parser = self.parser.as_mut()?; |
| 793 | if self.took_first { |
| 794 | parser.next_if_normal() |
| 795 | } else if let Some((value: OsString, had_eq_sign: bool)) = parser.raw_optional_value() { |
| 796 | if had_eq_sign { |
| 797 | self.parser = None; |
| 798 | } |
| 799 | self.took_first = true; |
| 800 | Some(value) |
| 801 | } else { |
| 802 | let value: OsString = parser |
| 803 | .next_if_normal() |
| 804 | .expect(msg:"ValuesIter must yield at least one value" ); |
| 805 | self.took_first = true; |
| 806 | Some(value) |
| 807 | } |
| 808 | } |
| 809 | } |
| 810 | |
| 811 | /// An iterator for the remaining raw arguments, returned by [`Parser::raw_args`]. |
| 812 | #[derive (Debug)] |
| 813 | pub struct RawArgs<'a>(&'a mut InnerIter); |
| 814 | |
| 815 | impl Iterator for RawArgs<'_> { |
| 816 | type Item = OsString; |
| 817 | |
| 818 | fn next(&mut self) -> Option<Self::Item> { |
| 819 | self.0.next() |
| 820 | } |
| 821 | } |
| 822 | |
| 823 | impl RawArgs<'_> { |
| 824 | /// Return a reference to the next() value without consuming it. |
| 825 | /// |
| 826 | /// An argument you peek but do not consume will still be seen by `Parser` |
| 827 | /// if you resume parsing. |
| 828 | /// |
| 829 | /// See [`Iterator::peekable`], [`std::iter::Peekable::peek`]. |
| 830 | pub fn peek(&self) -> Option<&OsStr> { |
| 831 | Some(self.0.as_slice().first()?.as_os_str()) |
| 832 | } |
| 833 | |
| 834 | /// Consume and return the next argument if a condition is true. |
| 835 | /// |
| 836 | /// See [`std::iter::Peekable::next_if`]. |
| 837 | pub fn next_if(&mut self, func: impl FnOnce(&OsStr) -> bool) -> Option<OsString> { |
| 838 | match self.peek() { |
| 839 | Some(arg) if func(arg) => self.next(), |
| 840 | _ => None, |
| 841 | } |
| 842 | } |
| 843 | |
| 844 | /// Return the remaining arguments as a slice. |
| 845 | pub fn as_slice(&self) -> &[OsString] { |
| 846 | self.0.as_slice() |
| 847 | } |
| 848 | } |
| 849 | |
| 850 | // These would make sense: |
| 851 | // - fn RawArgs::iter(&self) |
| 852 | // - impl IntoIterator for &RawArgs |
| 853 | // - impl AsRef<[OsString]> for RawArgs |
| 854 | // But they're niche and constrain future design. |
| 855 | // Let's leave them out for now. |
| 856 | // (Open question: should iter() return std::slice::Iter<OsString> and get |
| 857 | // an optimized .nth() and so on for free, or should it return a novel type |
| 858 | // that yields &OsStr?) |
| 859 | |
| 860 | /// An error during argument parsing. |
| 861 | /// |
| 862 | /// This implements `From<String>` and `From<&str>`, for easy ad-hoc error |
| 863 | /// messages. |
| 864 | // |
| 865 | // This is not #[non_exhaustive] because of the MSRV. I'm hoping no more |
| 866 | // variants will turn out to be needed: this seems reasonable, if the scope |
| 867 | // of the library doesn't change. Worst case scenario it can be stuffed inside |
| 868 | // Error::Custom. |
| 869 | pub enum Error { |
| 870 | /// An option argument was expected but was not found. |
| 871 | MissingValue { |
| 872 | /// The most recently emitted option. |
| 873 | option: Option<String>, |
| 874 | }, |
| 875 | |
| 876 | /// An unexpected option was found. |
| 877 | UnexpectedOption(String), |
| 878 | |
| 879 | /// A positional argument was found when none was expected. |
| 880 | UnexpectedArgument(OsString), |
| 881 | |
| 882 | /// An option had a value when none was expected. |
| 883 | UnexpectedValue { |
| 884 | /// The option. |
| 885 | option: String, |
| 886 | /// The value. |
| 887 | value: OsString, |
| 888 | }, |
| 889 | |
| 890 | /// Parsing a value failed. Returned by methods on [`ValueExt`]. |
| 891 | ParsingFailed { |
| 892 | /// The string that failed to parse. |
| 893 | value: String, |
| 894 | /// The error returned while parsing. |
| 895 | error: Box<dyn std::error::Error + Send + Sync + 'static>, |
| 896 | }, |
| 897 | |
| 898 | /// A value was found that was not valid unicode. |
| 899 | /// |
| 900 | /// This can be returned by the methods on [`ValueExt`]. |
| 901 | NonUnicodeValue(OsString), |
| 902 | |
| 903 | /// For custom error messages in application code. |
| 904 | Custom(Box<dyn std::error::Error + Send + Sync + 'static>), |
| 905 | } |
| 906 | |
| 907 | impl Display for Error { |
| 908 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 909 | use crate::Error::*; |
| 910 | match self { |
| 911 | MissingValue { option: None } => write!(f, "missing argument" ), |
| 912 | MissingValue { |
| 913 | option: Some(option), |
| 914 | } => { |
| 915 | write!(f, "missing argument for option ' {}'" , option) |
| 916 | } |
| 917 | UnexpectedOption(option) => write!(f, "invalid option ' {}'" , option), |
| 918 | UnexpectedArgument(value) => write!(f, "unexpected argument {:?}" , value), |
| 919 | UnexpectedValue { option, value } => { |
| 920 | write!( |
| 921 | f, |
| 922 | "unexpected argument for option ' {}': {:?}" , |
| 923 | option, value |
| 924 | ) |
| 925 | } |
| 926 | NonUnicodeValue(value) => write!(f, "argument is invalid unicode: {:?}" , value), |
| 927 | ParsingFailed { value, error } => { |
| 928 | write!(f, "cannot parse argument {:?}: {}" , value, error) |
| 929 | } |
| 930 | Custom(err) => write!(f, " {}" , err), |
| 931 | } |
| 932 | } |
| 933 | } |
| 934 | |
| 935 | // This is printed when returning an error from main(), so defer to Display |
| 936 | impl std::fmt::Debug for Error { |
| 937 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 938 | Display::fmt(self, f) |
| 939 | } |
| 940 | } |
| 941 | |
| 942 | impl std::error::Error for Error { |
| 943 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
| 944 | match self { |
| 945 | Error::ParsingFailed { error: &Box, .. } | Error::Custom(error: &Box) => Some(error.as_ref()), |
| 946 | _ => None, |
| 947 | } |
| 948 | } |
| 949 | } |
| 950 | |
| 951 | impl From<String> for Error { |
| 952 | fn from(msg: String) -> Self { |
| 953 | Error::Custom(msg.into()) |
| 954 | } |
| 955 | } |
| 956 | |
| 957 | impl<'a> From<&'a str> for Error { |
| 958 | fn from(msg: &'a str) -> Self { |
| 959 | Error::Custom(msg.into()) |
| 960 | } |
| 961 | } |
| 962 | |
| 963 | /// For [`OsString::into_string`], so it may be used with the try (`?`) operator. |
| 964 | /// |
| 965 | /// [`ValueExt::string`] is the new preferred method because it's compatible with |
| 966 | /// catch-all error types like `anyhow::Error`. |
| 967 | impl From<OsString> for Error { |
| 968 | fn from(arg: OsString) -> Self { |
| 969 | Error::NonUnicodeValue(arg) |
| 970 | } |
| 971 | } |
| 972 | |
| 973 | mod private { |
| 974 | pub trait Sealed {} |
| 975 | impl Sealed for std::ffi::OsString {} |
| 976 | } |
| 977 | |
| 978 | /// An optional extension trait with methods for parsing [`OsString`]s. |
| 979 | /// |
| 980 | /// They may fail in two cases: |
| 981 | /// - The value cannot be decoded because it's invalid unicode |
| 982 | /// ([`Error::NonUnicodeValue`]) |
| 983 | /// - The value can be decoded, but parsing fails ([`Error::ParsingFailed`]) |
| 984 | /// |
| 985 | /// If parsing fails the error will be wrapped in lexopt's own [`Error`] type. |
| 986 | pub trait ValueExt: private::Sealed { |
| 987 | /// Decode the value and parse it using [`FromStr`]. |
| 988 | /// |
| 989 | /// This will fail if the value is not valid unicode or if the subsequent |
| 990 | /// parsing fails. |
| 991 | fn parse<T: FromStr>(&self) -> Result<T, Error> |
| 992 | where |
| 993 | T::Err: Into<Box<dyn std::error::Error + Send + Sync + 'static>>; |
| 994 | |
| 995 | /// Decode the value and parse it using a custom function. |
| 996 | fn parse_with<F, T, E>(&self, func: F) -> Result<T, Error> |
| 997 | where |
| 998 | F: FnOnce(&str) -> Result<T, E>, |
| 999 | E: Into<Box<dyn std::error::Error + Send + Sync + 'static>>; |
| 1000 | |
| 1001 | // There is no parse_os_with() because I can't think of any useful |
| 1002 | // fallible operations on an OsString. Typically you'd either decode it, |
| 1003 | // use it as is, or do an infallible conversion to a PathBuf or such. |
| 1004 | // |
| 1005 | // If you have a use for parse_os_with() please open an issue with an |
| 1006 | // example. |
| 1007 | |
| 1008 | /// Convert the `OsString` into a [`String`] if it's valid Unicode. |
| 1009 | /// |
| 1010 | /// This is like [`OsString::into_string`] but returns an |
| 1011 | /// [`Error::NonUnicodeValue`] on error instead of the original `OsString`. |
| 1012 | /// This makes it easier to propagate the failure with libraries like |
| 1013 | /// `anyhow`. |
| 1014 | fn string(self) -> Result<String, Error>; |
| 1015 | } |
| 1016 | |
| 1017 | impl ValueExt for OsString { |
| 1018 | fn parse<T: FromStr>(&self) -> Result<T, Error> |
| 1019 | where |
| 1020 | T::Err: Into<Box<dyn std::error::Error + Send + Sync + 'static>>, |
| 1021 | { |
| 1022 | self.parse_with(FromStr::from_str) |
| 1023 | } |
| 1024 | |
| 1025 | fn parse_with<F, T, E>(&self, func: F) -> Result<T, Error> |
| 1026 | where |
| 1027 | F: FnOnce(&str) -> Result<T, E>, |
| 1028 | E: Into<Box<dyn std::error::Error + Send + Sync + 'static>>, |
| 1029 | { |
| 1030 | match self.to_str() { |
| 1031 | Some(text) => match func(text) { |
| 1032 | Ok(value) => Ok(value), |
| 1033 | Err(err) => Err(Error::ParsingFailed { |
| 1034 | value: text.to_owned(), |
| 1035 | error: err.into(), |
| 1036 | }), |
| 1037 | }, |
| 1038 | None => Err(Error::NonUnicodeValue(self.into())), |
| 1039 | } |
| 1040 | } |
| 1041 | |
| 1042 | fn string(self) -> Result<String, Error> { |
| 1043 | match self.into_string() { |
| 1044 | Ok(string) => Ok(string), |
| 1045 | Err(raw) => Err(Error::NonUnicodeValue(raw)), |
| 1046 | } |
| 1047 | } |
| 1048 | } |
| 1049 | |
| 1050 | /// A small prelude for processing arguments. |
| 1051 | /// |
| 1052 | /// It allows you to write `Short`/`Long`/`Value` without an [`Arg`] prefix |
| 1053 | /// and adds convenience methods to [`OsString`]. |
| 1054 | /// |
| 1055 | /// If this is used it's best to import it inside a function, not in module |
| 1056 | /// scope: |
| 1057 | /// ``` |
| 1058 | /// # struct Args; |
| 1059 | /// fn parse_args() -> Result<Args, lexopt::Error> { |
| 1060 | /// use lexopt::prelude::*; |
| 1061 | /// // ... |
| 1062 | /// # Ok(Args) |
| 1063 | /// } |
| 1064 | /// ``` |
| 1065 | pub mod prelude { |
| 1066 | pub use super::Arg::*; |
| 1067 | pub use super::ValueExt; |
| 1068 | } |
| 1069 | |
| 1070 | /// Take the first codepoint of a bytestring. On error, return the first |
| 1071 | /// (and therefore in some way invalid) byte/code unit. |
| 1072 | /// |
| 1073 | /// The rest of the bytestring does not have to be valid unicode. |
| 1074 | fn first_codepoint(bytes: &[u8]) -> Result<Option<char>, u8> { |
| 1075 | // We only need the first 4 bytes |
| 1076 | let bytes: &[u8] = bytes.get(..4).unwrap_or(default:bytes); |
| 1077 | let text: &str = match std::str::from_utf8(bytes) { |
| 1078 | Ok(text: &str) => text, |
| 1079 | Err(err: Utf8Error) if err.valid_up_to() > 0 => { |
| 1080 | std::str::from_utf8(&bytes[..err.valid_up_to()]).unwrap() |
| 1081 | } |
| 1082 | Err(_) => return Err(bytes[0]), |
| 1083 | }; |
| 1084 | Ok(text.chars().next()) |
| 1085 | } |
| 1086 | |
| 1087 | #[cfg (windows)] |
| 1088 | /// As before, but for UTF-16. |
| 1089 | fn first_utf16_codepoint(units: &[u16]) -> Result<Option<char>, u16> { |
| 1090 | match std::char::decode_utf16(units.iter().cloned()).next() { |
| 1091 | Some(Ok(ch)) => Ok(Some(ch)), |
| 1092 | Some(Err(_)) => Err(units[0]), |
| 1093 | None => Ok(None), |
| 1094 | } |
| 1095 | } |
| 1096 | |
| 1097 | #[cfg (test)] |
| 1098 | mod tests { |
| 1099 | use super::prelude::*; |
| 1100 | use super::*; |
| 1101 | |
| 1102 | fn parse(args: &'static str) -> Parser { |
| 1103 | Parser::from_args(args.split_whitespace().map(bad_string)) |
| 1104 | } |
| 1105 | |
| 1106 | /// Specialized backport of matches!() |
| 1107 | macro_rules! assert_matches { |
| 1108 | ($expression: expr, $( $pattern: pat )|+) => { |
| 1109 | match $expression { |
| 1110 | $( $pattern )|+ => (), |
| 1111 | _ => panic!( |
| 1112 | "{:?} does not match {:?}" , |
| 1113 | stringify!($expression), |
| 1114 | stringify!($( $pattern )|+) |
| 1115 | ), |
| 1116 | } |
| 1117 | }; |
| 1118 | } |
| 1119 | |
| 1120 | #[test ] |
| 1121 | fn test_basic() -> Result<(), Error> { |
| 1122 | let mut p = parse("-n 10 foo - -- baz -qux" ); |
| 1123 | assert_eq!(p.next()?.unwrap(), Short('n' )); |
| 1124 | assert_eq!(p.value()?.parse::<i32>()?, 10); |
| 1125 | assert_eq!(p.next()?.unwrap(), Value("foo" .into())); |
| 1126 | assert_eq!(p.next()?.unwrap(), Value("-" .into())); |
| 1127 | assert_eq!(p.next()?.unwrap(), Value("baz" .into())); |
| 1128 | assert_eq!(p.next()?.unwrap(), Value("-qux" .into())); |
| 1129 | assert_eq!(p.next()?, None); |
| 1130 | assert_eq!(p.next()?, None); |
| 1131 | assert_eq!(p.next()?, None); |
| 1132 | Ok(()) |
| 1133 | } |
| 1134 | |
| 1135 | #[test ] |
| 1136 | fn test_combined() -> Result<(), Error> { |
| 1137 | let mut p = parse("-abc -fvalue -xfvalue" ); |
| 1138 | assert_eq!(p.next()?.unwrap(), Short('a' )); |
| 1139 | assert_eq!(p.next()?.unwrap(), Short('b' )); |
| 1140 | assert_eq!(p.next()?.unwrap(), Short('c' )); |
| 1141 | assert_eq!(p.next()?.unwrap(), Short('f' )); |
| 1142 | assert_eq!(p.value()?, "value" ); |
| 1143 | assert_eq!(p.next()?.unwrap(), Short('x' )); |
| 1144 | assert_eq!(p.next()?.unwrap(), Short('f' )); |
| 1145 | assert_eq!(p.value()?, "value" ); |
| 1146 | assert_eq!(p.next()?, None); |
| 1147 | Ok(()) |
| 1148 | } |
| 1149 | |
| 1150 | #[test ] |
| 1151 | fn test_long() -> Result<(), Error> { |
| 1152 | let mut p = parse("--foo --bar=qux --foobar=qux=baz" ); |
| 1153 | assert_eq!(p.next()?.unwrap(), Long("foo" )); |
| 1154 | assert_eq!(p.next()?.unwrap(), Long("bar" )); |
| 1155 | assert_eq!(p.value()?, "qux" ); |
| 1156 | assert_eq!(p.next()?.unwrap(), Long("foobar" )); |
| 1157 | match p.next().unwrap_err() { |
| 1158 | Error::UnexpectedValue { option, value } => { |
| 1159 | assert_eq!(option, "--foobar" ); |
| 1160 | assert_eq!(value, "qux=baz" ); |
| 1161 | } |
| 1162 | _ => panic!(), |
| 1163 | } |
| 1164 | assert_eq!(p.next()?, None); |
| 1165 | Ok(()) |
| 1166 | } |
| 1167 | |
| 1168 | #[test ] |
| 1169 | fn test_dash_args() -> Result<(), Error> { |
| 1170 | // "--" should indicate the end of the options |
| 1171 | let mut p = parse("-x -- -y" ); |
| 1172 | assert_eq!(p.next()?.unwrap(), Short('x' )); |
| 1173 | assert_eq!(p.next()?.unwrap(), Value("-y" .into())); |
| 1174 | assert_eq!(p.next()?, None); |
| 1175 | |
| 1176 | // ...unless it's an argument of an option |
| 1177 | let mut p = parse("-x -- -y" ); |
| 1178 | assert_eq!(p.next()?.unwrap(), Short('x' )); |
| 1179 | assert_eq!(p.value()?, "--" ); |
| 1180 | assert_eq!(p.next()?.unwrap(), Short('y' )); |
| 1181 | assert_eq!(p.next()?, None); |
| 1182 | |
| 1183 | // "-" is a valid value that should not be treated as an option |
| 1184 | let mut p = parse("-x - -y" ); |
| 1185 | assert_eq!(p.next()?.unwrap(), Short('x' )); |
| 1186 | assert_eq!(p.next()?.unwrap(), Value("-" .into())); |
| 1187 | assert_eq!(p.next()?.unwrap(), Short('y' )); |
| 1188 | assert_eq!(p.next()?, None); |
| 1189 | |
| 1190 | // '-' is a silly and hard to use short option, but other parsers treat |
| 1191 | // it like an option in this position |
| 1192 | let mut p = parse("-x-y" ); |
| 1193 | assert_eq!(p.next()?.unwrap(), Short('x' )); |
| 1194 | assert_eq!(p.next()?.unwrap(), Short('-' )); |
| 1195 | assert_eq!(p.next()?.unwrap(), Short('y' )); |
| 1196 | assert_eq!(p.next()?, None); |
| 1197 | |
| 1198 | Ok(()) |
| 1199 | } |
| 1200 | |
| 1201 | #[test ] |
| 1202 | fn test_missing_value() -> Result<(), Error> { |
| 1203 | let mut p = parse("-o" ); |
| 1204 | assert_eq!(p.next()?.unwrap(), Short('o' )); |
| 1205 | match p.value() { |
| 1206 | Err(Error::MissingValue { |
| 1207 | option: Some(option), |
| 1208 | }) => assert_eq!(option, "-o" ), |
| 1209 | _ => panic!(), |
| 1210 | } |
| 1211 | |
| 1212 | let mut q = parse("--out" ); |
| 1213 | assert_eq!(q.next()?.unwrap(), Long("out" )); |
| 1214 | match q.value() { |
| 1215 | Err(Error::MissingValue { |
| 1216 | option: Some(option), |
| 1217 | }) => assert_eq!(option, "--out" ), |
| 1218 | _ => panic!(), |
| 1219 | } |
| 1220 | |
| 1221 | let mut r = parse("" ); |
| 1222 | assert_matches!(r.value(), Err(Error::MissingValue { option: None })); |
| 1223 | |
| 1224 | Ok(()) |
| 1225 | } |
| 1226 | |
| 1227 | #[test ] |
| 1228 | fn test_weird_args() -> Result<(), Error> { |
| 1229 | let mut p = Parser::from_args(&[ |
| 1230 | "" , "--=" , "--=3" , "-" , "-x" , "--" , "-" , "-x" , "--" , "" , "-" , "-x" , |
| 1231 | ]); |
| 1232 | assert_eq!(p.next()?.unwrap(), Value(OsString::from("" ))); |
| 1233 | |
| 1234 | // These are weird and questionable, but this seems to be the standard |
| 1235 | // interpretation |
| 1236 | // GNU getopt_long and argparse complain that it could be an abbreviation |
| 1237 | // of every single long option |
| 1238 | // clap complains that "--" is not expected, which matches its treatment |
| 1239 | // of unknown long options |
| 1240 | assert_eq!(p.next()?.unwrap(), Long("" )); |
| 1241 | assert_eq!(p.value()?, OsString::from("" )); |
| 1242 | assert_eq!(p.next()?.unwrap(), Long("" )); |
| 1243 | assert_eq!(p.value()?, OsString::from("3" )); |
| 1244 | |
| 1245 | assert_eq!(p.next()?.unwrap(), Value(OsString::from("-" ))); |
| 1246 | assert_eq!(p.next()?.unwrap(), Short('x' )); |
| 1247 | assert_eq!(p.value()?, OsString::from("--" )); |
| 1248 | assert_eq!(p.next()?.unwrap(), Value(OsString::from("-" ))); |
| 1249 | assert_eq!(p.next()?.unwrap(), Short('x' )); |
| 1250 | assert_eq!(p.next()?.unwrap(), Value(OsString::from("" ))); |
| 1251 | assert_eq!(p.next()?.unwrap(), Value(OsString::from("-" ))); |
| 1252 | assert_eq!(p.next()?.unwrap(), Value(OsString::from("-x" ))); |
| 1253 | assert_eq!(p.next()?, None); |
| 1254 | |
| 1255 | #[cfg (any(unix, target_os = "wasi" , windows))] |
| 1256 | { |
| 1257 | let mut q = parse("--=@" ); |
| 1258 | assert_eq!(q.next()?.unwrap(), Long("" )); |
| 1259 | assert_eq!(q.value()?, bad_string("@" )); |
| 1260 | assert_eq!(q.next()?, None); |
| 1261 | } |
| 1262 | |
| 1263 | let mut r = parse("" ); |
| 1264 | assert_eq!(r.next()?, None); |
| 1265 | |
| 1266 | Ok(()) |
| 1267 | } |
| 1268 | |
| 1269 | #[test ] |
| 1270 | fn test_unicode() -> Result<(), Error> { |
| 1271 | let mut p = parse("-aµ --µ=10 µ --foo=µ" ); |
| 1272 | assert_eq!(p.next()?.unwrap(), Short('a' )); |
| 1273 | assert_eq!(p.next()?.unwrap(), Short('µ' )); |
| 1274 | assert_eq!(p.next()?.unwrap(), Long("µ" )); |
| 1275 | assert_eq!(p.value()?, "10" ); |
| 1276 | assert_eq!(p.next()?.unwrap(), Value("µ" .into())); |
| 1277 | assert_eq!(p.next()?.unwrap(), Long("foo" )); |
| 1278 | assert_eq!(p.value()?, "µ" ); |
| 1279 | Ok(()) |
| 1280 | } |
| 1281 | |
| 1282 | #[cfg (any(unix, target_os = "wasi" , windows))] |
| 1283 | #[test ] |
| 1284 | fn test_mixed_invalid() -> Result<(), Error> { |
| 1285 | let mut p = parse("--foo=@@@" ); |
| 1286 | assert_eq!(p.next()?.unwrap(), Long("foo" )); |
| 1287 | assert_eq!(p.value()?, bad_string("@@@" )); |
| 1288 | |
| 1289 | let mut q = parse("-💣@@@" ); |
| 1290 | assert_eq!(q.next()?.unwrap(), Short('💣' )); |
| 1291 | assert_eq!(q.value()?, bad_string("@@@" )); |
| 1292 | |
| 1293 | let mut r = parse("-f@@@" ); |
| 1294 | assert_eq!(r.next()?.unwrap(), Short('f' )); |
| 1295 | assert_eq!(r.next()?.unwrap(), Short('�' )); |
| 1296 | assert_eq!(r.next()?.unwrap(), Short('�' )); |
| 1297 | assert_eq!(r.next()?.unwrap(), Short('�' )); |
| 1298 | assert_eq!(r.next()?, None); |
| 1299 | |
| 1300 | let mut s = parse("--foo=bar=@@@" ); |
| 1301 | assert_eq!(s.next()?.unwrap(), Long("foo" )); |
| 1302 | assert_eq!(s.value()?, bad_string("bar=@@@" )); |
| 1303 | |
| 1304 | Ok(()) |
| 1305 | } |
| 1306 | |
| 1307 | #[cfg (any(unix, target_os = "wasi" , windows))] |
| 1308 | #[test ] |
| 1309 | fn test_separate_invalid() -> Result<(), Error> { |
| 1310 | let mut p = parse("--foo @@@" ); |
| 1311 | assert_eq!(p.next()?.unwrap(), Long("foo" )); |
| 1312 | assert_eq!(p.value()?, bad_string("@@@" )); |
| 1313 | Ok(()) |
| 1314 | } |
| 1315 | |
| 1316 | #[cfg (any(unix, target_os = "wasi" , windows))] |
| 1317 | #[test ] |
| 1318 | fn test_invalid_long_option() -> Result<(), Error> { |
| 1319 | let mut p = parse("--@=10" ); |
| 1320 | assert_eq!(p.next()?.unwrap(), Long("�" )); |
| 1321 | assert_eq!(p.value().unwrap(), OsString::from("10" )); |
| 1322 | assert_eq!(p.next()?, None); |
| 1323 | |
| 1324 | let mut q = parse("--@" ); |
| 1325 | assert_eq!(q.next()?.unwrap(), Long("�" )); |
| 1326 | assert_eq!(q.next()?, None); |
| 1327 | |
| 1328 | Ok(()) |
| 1329 | } |
| 1330 | |
| 1331 | #[test ] |
| 1332 | fn short_opt_equals_sign() -> Result<(), Error> { |
| 1333 | let mut p = parse("-a=b" ); |
| 1334 | assert_eq!(p.next()?.unwrap(), Short('a' )); |
| 1335 | assert_eq!(p.value()?, OsString::from("b" )); |
| 1336 | assert_eq!(p.next()?, None); |
| 1337 | |
| 1338 | let mut p = parse("-a=b" ); |
| 1339 | assert_eq!(p.next()?.unwrap(), Short('a' )); |
| 1340 | match p.next().unwrap_err() { |
| 1341 | Error::UnexpectedValue { option, value } => { |
| 1342 | assert_eq!(option, "-a" ); |
| 1343 | assert_eq!(value, "b" ); |
| 1344 | } |
| 1345 | _ => panic!(), |
| 1346 | } |
| 1347 | assert_eq!(p.next()?, None); |
| 1348 | |
| 1349 | let mut p = parse("-a=" ); |
| 1350 | assert_eq!(p.next()?.unwrap(), Short('a' )); |
| 1351 | assert_eq!(p.value()?, OsString::from("" )); |
| 1352 | assert_eq!(p.next()?, None); |
| 1353 | |
| 1354 | let mut p = parse("-a=" ); |
| 1355 | assert_eq!(p.next()?.unwrap(), Short('a' )); |
| 1356 | match p.next().unwrap_err() { |
| 1357 | Error::UnexpectedValue { option, value } => { |
| 1358 | assert_eq!(option, "-a" ); |
| 1359 | assert_eq!(value, "" ); |
| 1360 | } |
| 1361 | _ => panic!(), |
| 1362 | } |
| 1363 | assert_eq!(p.next()?, None); |
| 1364 | |
| 1365 | let mut p = parse("-=" ); |
| 1366 | assert_eq!(p.next()?.unwrap(), Short('=' )); |
| 1367 | assert_eq!(p.next()?, None); |
| 1368 | |
| 1369 | let mut p = parse("-=a" ); |
| 1370 | assert_eq!(p.next()?.unwrap(), Short('=' )); |
| 1371 | assert_eq!(p.value()?, "a" ); |
| 1372 | |
| 1373 | Ok(()) |
| 1374 | } |
| 1375 | |
| 1376 | #[cfg (any(unix, target_os = "wasi" , windows))] |
| 1377 | #[test ] |
| 1378 | fn short_opt_equals_sign_invalid() -> Result<(), Error> { |
| 1379 | let mut p = parse("-a=@" ); |
| 1380 | assert_eq!(p.next()?.unwrap(), Short('a' )); |
| 1381 | assert_eq!(p.value()?, bad_string("@" )); |
| 1382 | assert_eq!(p.next()?, None); |
| 1383 | |
| 1384 | let mut p = parse("-a=@" ); |
| 1385 | assert_eq!(p.next()?.unwrap(), Short('a' )); |
| 1386 | match p.next().unwrap_err() { |
| 1387 | Error::UnexpectedValue { option, value } => { |
| 1388 | assert_eq!(option, "-a" ); |
| 1389 | assert_eq!(value, bad_string("@" )); |
| 1390 | } |
| 1391 | _ => panic!(), |
| 1392 | } |
| 1393 | assert_eq!(p.next()?, None); |
| 1394 | |
| 1395 | let mut p = parse("-=@" ); |
| 1396 | assert_eq!(p.next()?.unwrap(), Short('=' )); |
| 1397 | assert_eq!(p.value()?, bad_string("@" )); |
| 1398 | |
| 1399 | Ok(()) |
| 1400 | } |
| 1401 | |
| 1402 | #[test ] |
| 1403 | fn multi_values() -> Result<(), Error> { |
| 1404 | for &case in &["-a b c d" , "-ab c d" , "-a b c d --" , "--a b c d" ] { |
| 1405 | let mut p = parse(case); |
| 1406 | p.next()?.unwrap(); |
| 1407 | let mut iter = p.values()?; |
| 1408 | let values: Vec<_> = iter.by_ref().collect(); |
| 1409 | assert_eq!(values, &["b" , "c" , "d" ]); |
| 1410 | assert!(iter.next().is_none()); |
| 1411 | assert!(p.next()?.is_none()); |
| 1412 | } |
| 1413 | |
| 1414 | for &case in &["-a=b c" , "--a=b c" ] { |
| 1415 | let mut p = parse(case); |
| 1416 | p.next()?.unwrap(); |
| 1417 | let mut iter = p.values()?; |
| 1418 | let values: Vec<_> = iter.by_ref().collect(); |
| 1419 | assert_eq!(values, &["b" ]); |
| 1420 | assert!(iter.next().is_none()); |
| 1421 | assert_eq!(p.next()?.unwrap(), Value("c" .into())); |
| 1422 | assert!(p.next()?.is_none()); |
| 1423 | } |
| 1424 | |
| 1425 | for &case in &["-a" , "--a" , "-a -b" , "-a -- b" , "-a --" ] { |
| 1426 | let mut p = parse(case); |
| 1427 | p.next()?.unwrap(); |
| 1428 | assert!(p.values().is_err()); |
| 1429 | assert!(p.next().is_ok()); |
| 1430 | assert!(p.next().unwrap().is_none()); |
| 1431 | } |
| 1432 | |
| 1433 | for &case in &["-a=" , "--a=" ] { |
| 1434 | let mut p = parse(case); |
| 1435 | p.next()?.unwrap(); |
| 1436 | let mut iter = p.values()?; |
| 1437 | let values: Vec<_> = iter.by_ref().collect(); |
| 1438 | assert_eq!(values, &["" ]); |
| 1439 | assert!(iter.next().is_none()); |
| 1440 | assert!(p.next()?.is_none()); |
| 1441 | } |
| 1442 | |
| 1443 | // Test that .values() does not eagerly consume the first value |
| 1444 | for &case in &["-a=b" , "--a=b" , "-a b" ] { |
| 1445 | let mut p = parse(case); |
| 1446 | p.next()?.unwrap(); |
| 1447 | assert!(p.values().is_ok()); |
| 1448 | assert_eq!(p.value()?, "b" ); |
| 1449 | } |
| 1450 | |
| 1451 | { |
| 1452 | let mut p = parse("-ab" ); |
| 1453 | p.next()?.unwrap(); |
| 1454 | assert!(p.values().is_ok()); |
| 1455 | assert_eq!(p.next()?.unwrap(), Short('b' )); |
| 1456 | } |
| 1457 | |
| 1458 | Ok(()) |
| 1459 | } |
| 1460 | |
| 1461 | #[test ] |
| 1462 | fn raw_args() -> Result<(), Error> { |
| 1463 | let mut p = parse("-a b c d" ); |
| 1464 | assert!(p.try_raw_args().is_some()); |
| 1465 | assert_eq!(p.raw_args()?.collect::<Vec<_>>(), &["-a" , "b" , "c" , "d" ]); |
| 1466 | assert!(p.try_raw_args().is_some()); |
| 1467 | assert!(p.next()?.is_none()); |
| 1468 | assert!(p.try_raw_args().is_some()); |
| 1469 | assert_eq!(p.raw_args()?.as_slice().len(), 0); |
| 1470 | |
| 1471 | let mut p = parse("-ab c d" ); |
| 1472 | p.next()?; |
| 1473 | assert!(p.try_raw_args().is_none()); |
| 1474 | assert!(p.raw_args().is_err()); |
| 1475 | assert_eq!(p.try_raw_args().unwrap().collect::<Vec<_>>(), &["c" , "d" ]); |
| 1476 | assert!(p.next()?.is_none()); |
| 1477 | assert_eq!(p.try_raw_args().unwrap().as_slice().len(), 0); |
| 1478 | |
| 1479 | let mut p = parse("-a b c d" ); |
| 1480 | assert_eq!(p.raw_args()?.take(3).collect::<Vec<_>>(), &["-a" , "b" , "c" ]); |
| 1481 | assert_eq!(p.next()?, Some(Value("d" .into()))); |
| 1482 | assert!(p.next()?.is_none()); |
| 1483 | |
| 1484 | let mut p = parse("a" ); |
| 1485 | let mut it = p.raw_args()?; |
| 1486 | assert_eq!(it.peek(), Some("a" .as_ref())); |
| 1487 | assert_eq!(it.next_if(|_| false), None); |
| 1488 | assert_eq!(p.next()?, Some(Value("a" .into()))); |
| 1489 | assert!(p.next()?.is_none()); |
| 1490 | |
| 1491 | Ok(()) |
| 1492 | } |
| 1493 | |
| 1494 | #[test ] |
| 1495 | fn bin_name() { |
| 1496 | assert_eq!( |
| 1497 | Parser::from_iter(&["foo" , "bar" , "baz" ]).bin_name(), |
| 1498 | Some("foo" ) |
| 1499 | ); |
| 1500 | assert_eq!(Parser::from_args(&["foo" , "bar" , "baz" ]).bin_name(), None); |
| 1501 | assert_eq!(Parser::from_iter(&[] as &[&str]).bin_name(), None); |
| 1502 | assert_eq!(Parser::from_iter(&["" ]).bin_name(), Some("" )); |
| 1503 | assert!(Parser::from_env().bin_name().is_some()); |
| 1504 | #[cfg (any(unix, target_os = "wasi" , windows))] |
| 1505 | assert_eq!( |
| 1506 | Parser::from_iter(vec![bad_string("foo@bar" )]).bin_name(), |
| 1507 | Some("foo�bar" ) |
| 1508 | ); |
| 1509 | } |
| 1510 | |
| 1511 | #[test ] |
| 1512 | fn test_value_ext() -> Result<(), Error> { |
| 1513 | let s = OsString::from("-10" ); |
| 1514 | assert_eq!(s.parse::<i32>()?, -10); |
| 1515 | assert_eq!( |
| 1516 | s.parse_with(|s| match s { |
| 1517 | "-10" => Ok(0), |
| 1518 | _ => Err("bad" ), |
| 1519 | })?, |
| 1520 | 0, |
| 1521 | ); |
| 1522 | match s.parse::<u32>() { |
| 1523 | Err(Error::ParsingFailed { value, .. }) => assert_eq!(value, "-10" ), |
| 1524 | _ => panic!(), |
| 1525 | } |
| 1526 | match s.parse_with(|s| match s { |
| 1527 | "11" => Ok(0_i32), |
| 1528 | _ => Err("bad" ), |
| 1529 | }) { |
| 1530 | Err(Error::ParsingFailed { value, .. }) => assert_eq!(value, "-10" ), |
| 1531 | _ => panic!(), |
| 1532 | } |
| 1533 | assert_eq!(s.string()?, "-10" ); |
| 1534 | Ok(()) |
| 1535 | } |
| 1536 | |
| 1537 | #[cfg (any(unix, target_os = "wasi" , windows))] |
| 1538 | #[test ] |
| 1539 | fn test_value_ext_invalid() -> Result<(), Error> { |
| 1540 | let s = bad_string("foo@" ); |
| 1541 | assert_matches!(s.parse::<i32>(), Err(Error::NonUnicodeValue(_))); |
| 1542 | assert_matches!( |
| 1543 | s.parse_with(<f32 as FromStr>::from_str), |
| 1544 | Err(Error::NonUnicodeValue(_)) |
| 1545 | ); |
| 1546 | assert_matches!(s.string(), Err(Error::NonUnicodeValue(_))); |
| 1547 | Ok(()) |
| 1548 | } |
| 1549 | |
| 1550 | #[test ] |
| 1551 | fn test_first_codepoint() { |
| 1552 | assert_eq!(first_codepoint(b"foo" ).unwrap(), Some('f' )); |
| 1553 | assert_eq!(first_codepoint(b"" ).unwrap(), None); |
| 1554 | assert_eq!(first_codepoint(b"f \xFF\xFF" ).unwrap(), Some('f' )); |
| 1555 | assert_eq!(first_codepoint(b" \xC2\xB5bar" ).unwrap(), Some('µ' )); |
| 1556 | first_codepoint(b" \xFF" ).unwrap_err(); |
| 1557 | assert_eq!(first_codepoint(b"foo \xC2\xB5" ).unwrap(), Some('f' )); |
| 1558 | } |
| 1559 | |
| 1560 | /// Transform @ characters into invalid unicode. |
| 1561 | fn bad_string(text: &str) -> OsString { |
| 1562 | #[cfg (any(unix, target_os = "wasi" ))] |
| 1563 | { |
| 1564 | let mut text = text.as_bytes().to_vec(); |
| 1565 | for ch in &mut text { |
| 1566 | if *ch == b'@' { |
| 1567 | *ch = b' \xFF' ; |
| 1568 | } |
| 1569 | } |
| 1570 | OsString::from_vec(text) |
| 1571 | } |
| 1572 | #[cfg (windows)] |
| 1573 | { |
| 1574 | let mut out = Vec::new(); |
| 1575 | for ch in text.chars() { |
| 1576 | if ch == '@' { |
| 1577 | out.push(0xD800); |
| 1578 | } else { |
| 1579 | let mut buf = [0; 2]; |
| 1580 | out.extend(&*ch.encode_utf16(&mut buf)); |
| 1581 | } |
| 1582 | } |
| 1583 | OsString::from_wide(&out) |
| 1584 | } |
| 1585 | #[cfg (not(any(unix, target_os = "wasi" , windows)))] |
| 1586 | { |
| 1587 | if text.contains('@' ) { |
| 1588 | unimplemented!("Don't know how to create invalid OsStrings on this platform" ); |
| 1589 | } |
| 1590 | text.into() |
| 1591 | } |
| 1592 | } |
| 1593 | |
| 1594 | /// Basic exhaustive testing of short combinations of "interesting" |
| 1595 | /// arguments. They should not panic, not hang, and pass some checks. |
| 1596 | /// |
| 1597 | /// The advantage compared to full fuzzing is that it runs on all platforms |
| 1598 | /// and together with the other tests. cargo-fuzz doesn't work on Windows |
| 1599 | /// and requires a special incantation. |
| 1600 | /// |
| 1601 | /// A disadvantage is that it's still limited by arguments I could think of |
| 1602 | /// and only does very short sequences. Another is that it's bad at |
| 1603 | /// reporting failure, though the println!() helps. |
| 1604 | /// |
| 1605 | /// This test takes a while to run. |
| 1606 | #[test ] |
| 1607 | fn basic_fuzz() { |
| 1608 | #[cfg (any(windows, unix, target_os = "wasi" ))] |
| 1609 | const VOCABULARY: &[&str] = &[ |
| 1610 | "" , "-" , "--" , "---" , "a" , "-a" , "-aa" , "@" , "-@" , "-a@" , "-@a" , "--a" , "--@" , "--a=a" , |
| 1611 | "--a=" , "--a=@" , "--@=a" , "--=" , "--=@" , "--=a" , "-@@" , "-a=a" , "-a=" , "-=" , "-a-" , |
| 1612 | ]; |
| 1613 | #[cfg (not(any(windows, unix, target_os = "wasi" )))] |
| 1614 | const VOCABULARY: &[&str] = &[ |
| 1615 | "" , "-" , "--" , "---" , "a" , "-a" , "-aa" , "--a" , "--a=a" , "--a=" , "--=" , "--=a" , "-a=a" , |
| 1616 | "-a=" , "-=" , "-a-" , |
| 1617 | ]; |
| 1618 | exhaust(Parser::new(None, Vec::new().into_iter()), 0); |
| 1619 | let vocabulary: Vec<OsString> = VOCABULARY.iter().map(|&s| bad_string(s)).collect(); |
| 1620 | let mut permutations = vec![vec![]]; |
| 1621 | for _ in 0..3 { |
| 1622 | let mut new = Vec::new(); |
| 1623 | for old in permutations { |
| 1624 | for word in &vocabulary { |
| 1625 | let mut extended = old.clone(); |
| 1626 | extended.push(word); |
| 1627 | new.push(extended); |
| 1628 | } |
| 1629 | } |
| 1630 | permutations = new; |
| 1631 | for permutation in &permutations { |
| 1632 | println!("{:?}" , permutation); |
| 1633 | let p = Parser::from_args(permutation); |
| 1634 | exhaust(p, 0); |
| 1635 | } |
| 1636 | } |
| 1637 | } |
| 1638 | |
| 1639 | /// Run many sequences of methods on a Parser. |
| 1640 | fn exhaust(mut parser: Parser, depth: u16) { |
| 1641 | if depth > 100 { |
| 1642 | panic!("Stuck in loop" ); |
| 1643 | } |
| 1644 | |
| 1645 | // has_pending() == optional_value().is_some() |
| 1646 | if parser.has_pending() { |
| 1647 | { |
| 1648 | let mut parser = parser.clone(); |
| 1649 | assert!(parser.try_raw_args().is_none()); |
| 1650 | assert!(parser.try_raw_args().is_none()); |
| 1651 | assert!(parser.raw_args().is_err()); |
| 1652 | // Recovery possible |
| 1653 | assert!(parser.raw_args().is_ok()); |
| 1654 | assert!(parser.try_raw_args().is_some()); |
| 1655 | } |
| 1656 | |
| 1657 | { |
| 1658 | let mut parser = parser.clone(); |
| 1659 | assert!(parser.optional_value().is_some()); |
| 1660 | exhaust(parser, depth + 1); |
| 1661 | } |
| 1662 | } else { |
| 1663 | let prev_state = parser.state.clone(); |
| 1664 | let prev_remaining = parser.source.as_slice().len(); |
| 1665 | assert!(parser.optional_value().is_none()); |
| 1666 | assert!(parser.raw_args().is_ok()); |
| 1667 | assert!(parser.try_raw_args().is_some()); |
| 1668 | // Verify state transitions |
| 1669 | match prev_state { |
| 1670 | State::None | State::PendingValue(_) => { |
| 1671 | assert_matches!(parser.state, State::None); |
| 1672 | } |
| 1673 | State::Shorts(arg, pos) => { |
| 1674 | assert_eq!(pos, arg.len()); |
| 1675 | assert_matches!(parser.state, State::None); |
| 1676 | } |
| 1677 | #[cfg (windows)] |
| 1678 | State::ShortsU16(arg, pos) => { |
| 1679 | assert_eq!(pos, arg.len()); |
| 1680 | assert_matches!(parser.state, State::None); |
| 1681 | } |
| 1682 | State::FinishedOpts => assert_matches!(parser.state, State::FinishedOpts), |
| 1683 | } |
| 1684 | // No arguments were consumed |
| 1685 | assert_eq!(parser.source.as_slice().len(), prev_remaining); |
| 1686 | } |
| 1687 | |
| 1688 | { |
| 1689 | let mut parser = parser.clone(); |
| 1690 | match parser.next() { |
| 1691 | Ok(None) => { |
| 1692 | assert_matches!(parser.state, State::None | State::FinishedOpts); |
| 1693 | assert_eq!(parser.source.as_slice().len(), 0); |
| 1694 | } |
| 1695 | _ => exhaust(parser, depth + 1), |
| 1696 | } |
| 1697 | } |
| 1698 | |
| 1699 | { |
| 1700 | let mut parser = parser.clone(); |
| 1701 | match parser.value() { |
| 1702 | Err(_) => { |
| 1703 | assert_matches!(parser.state, State::None | State::FinishedOpts); |
| 1704 | assert_eq!(parser.source.as_slice().len(), 0); |
| 1705 | } |
| 1706 | Ok(_) => { |
| 1707 | assert_matches!(parser.state, State::None | State::FinishedOpts); |
| 1708 | exhaust(parser, depth + 1); |
| 1709 | } |
| 1710 | } |
| 1711 | } |
| 1712 | |
| 1713 | { |
| 1714 | match parser.values() { |
| 1715 | Err(_) => (), |
| 1716 | Ok(iter) => { |
| 1717 | assert!(iter.count() > 0); |
| 1718 | exhaust(parser, depth + 1); |
| 1719 | } |
| 1720 | } |
| 1721 | } |
| 1722 | } |
| 1723 | } |
| 1724 | |