1 | //! Minimal, flexible command-line parser |
2 | //! |
3 | //! As opposed to a declarative parser, this processes arguments as a stream of tokens. As lexing |
4 | //! a command-line is not context-free, we rely on the caller to decide how to interpret the |
5 | //! arguments. |
6 | //! |
7 | //! # Examples |
8 | //! |
9 | //! ```rust |
10 | //! use std::path::PathBuf; |
11 | //! use std::ffi::OsStr; |
12 | //! |
13 | //! type BoxedError = Box<dyn std::error::Error + Send + Sync>; |
14 | //! |
15 | //! #[derive(Debug)] |
16 | //! struct Args { |
17 | //! paths: Vec<PathBuf>, |
18 | //! color: Color, |
19 | //! verbosity: usize, |
20 | //! } |
21 | //! |
22 | //! #[derive(Debug)] |
23 | //! enum Color { |
24 | //! Always, |
25 | //! Auto, |
26 | //! Never, |
27 | //! } |
28 | //! |
29 | //! impl Color { |
30 | //! fn parse(s: Option<&OsStr>) -> Result<Self, BoxedError> { |
31 | //! let s = s.map(|s| s.to_str().ok_or(s)); |
32 | //! match s { |
33 | //! Some(Ok("always" )) | Some(Ok("" )) | None => { |
34 | //! Ok(Color::Always) |
35 | //! } |
36 | //! Some(Ok("auto" )) => { |
37 | //! Ok(Color::Auto) |
38 | //! } |
39 | //! Some(Ok("never" )) => { |
40 | //! Ok(Color::Never) |
41 | //! } |
42 | //! Some(invalid) => { |
43 | //! Err(format!("Invalid value for `--color`, {invalid:?}" ).into()) |
44 | //! } |
45 | //! } |
46 | //! } |
47 | //! } |
48 | //! |
49 | //! fn parse_args( |
50 | //! raw: impl IntoIterator<Item=impl Into<std::ffi::OsString>> |
51 | //! ) -> Result<Args, BoxedError> { |
52 | //! let mut args = Args { |
53 | //! paths: Vec::new(), |
54 | //! color: Color::Auto, |
55 | //! verbosity: 0, |
56 | //! }; |
57 | //! |
58 | //! let raw = clap_lex::RawArgs::new(raw); |
59 | //! let mut cursor = raw.cursor(); |
60 | //! raw.next(&mut cursor); // Skip the bin |
61 | //! while let Some(arg) = raw.next(&mut cursor) { |
62 | //! if arg.is_escape() { |
63 | //! args.paths.extend(raw.remaining(&mut cursor).map(PathBuf::from)); |
64 | //! } else if arg.is_stdio() { |
65 | //! args.paths.push(PathBuf::from("-" )); |
66 | //! } else if let Some((long, value)) = arg.to_long() { |
67 | //! match long { |
68 | //! Ok("verbose" ) => { |
69 | //! if let Some(value) = value { |
70 | //! return Err(format!("`--verbose` does not take a value, got `{value:?}`" ).into()); |
71 | //! } |
72 | //! args.verbosity += 1; |
73 | //! } |
74 | //! Ok("color" ) => { |
75 | //! args.color = Color::parse(value)?; |
76 | //! } |
77 | //! _ => { |
78 | //! return Err( |
79 | //! format!("Unexpected flag: --{}" , arg.display()).into() |
80 | //! ); |
81 | //! } |
82 | //! } |
83 | //! } else if let Some(mut shorts) = arg.to_short() { |
84 | //! while let Some(short) = shorts.next_flag() { |
85 | //! match short { |
86 | //! Ok('v' ) => { |
87 | //! args.verbosity += 1; |
88 | //! } |
89 | //! Ok('c' ) => { |
90 | //! let value = shorts.next_value_os(); |
91 | //! args.color = Color::parse(value)?; |
92 | //! } |
93 | //! Ok(c) => { |
94 | //! return Err(format!("Unexpected flag: -{c}" ).into()); |
95 | //! } |
96 | //! Err(e) => { |
97 | //! return Err(format!("Unexpected flag: -{}" , e.to_string_lossy()).into()); |
98 | //! } |
99 | //! } |
100 | //! } |
101 | //! } else { |
102 | //! args.paths.push(PathBuf::from(arg.to_value_os().to_owned())); |
103 | //! } |
104 | //! } |
105 | //! |
106 | //! Ok(args) |
107 | //! } |
108 | //! |
109 | //! let args = parse_args(["bin" , "--hello" , "world" ]); |
110 | //! println!("{args:?}" ); |
111 | //! ``` |
112 | |
113 | mod ext; |
114 | |
115 | use std::ffi::OsStr; |
116 | use std::ffi::OsString; |
117 | |
118 | pub use std::io::SeekFrom; |
119 | |
120 | pub use ext::OsStrExt; |
121 | |
122 | /// Command-line arguments |
123 | #[derive (Default, Clone, Debug, PartialEq, Eq)] |
124 | pub struct RawArgs { |
125 | items: Vec<OsString>, |
126 | } |
127 | |
128 | impl RawArgs { |
129 | //// Create an argument list to parse |
130 | /// |
131 | /// **NOTE:** The argument returned will be the current binary. |
132 | /// |
133 | /// # Example |
134 | /// |
135 | /// ```rust,no_run |
136 | /// # use std::path::PathBuf; |
137 | /// let raw = clap_lex::RawArgs::from_args(); |
138 | /// let mut cursor = raw.cursor(); |
139 | /// let _bin = raw.next_os(&mut cursor); |
140 | /// |
141 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); |
142 | /// println!("{paths:?}" ); |
143 | /// ``` |
144 | pub fn from_args() -> Self { |
145 | Self::new(std::env::args_os()) |
146 | } |
147 | |
148 | //// Create an argument list to parse |
149 | /// |
150 | /// # Example |
151 | /// |
152 | /// ```rust,no_run |
153 | /// # use std::path::PathBuf; |
154 | /// let raw = clap_lex::RawArgs::new(["bin" , "foo.txt" ]); |
155 | /// let mut cursor = raw.cursor(); |
156 | /// let _bin = raw.next_os(&mut cursor); |
157 | /// |
158 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); |
159 | /// println!("{paths:?}" ); |
160 | /// ``` |
161 | pub fn new(iter: impl IntoIterator<Item = impl Into<std::ffi::OsString>>) -> Self { |
162 | let iter = iter.into_iter(); |
163 | Self::from(iter) |
164 | } |
165 | |
166 | /// Create a cursor for walking the arguments |
167 | /// |
168 | /// # Example |
169 | /// |
170 | /// ```rust,no_run |
171 | /// # use std::path::PathBuf; |
172 | /// let raw = clap_lex::RawArgs::new(["bin" , "foo.txt" ]); |
173 | /// let mut cursor = raw.cursor(); |
174 | /// let _bin = raw.next_os(&mut cursor); |
175 | /// |
176 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); |
177 | /// println!("{paths:?}" ); |
178 | /// ``` |
179 | pub fn cursor(&self) -> ArgCursor { |
180 | ArgCursor::new() |
181 | } |
182 | |
183 | /// Advance the cursor, returning the next [`ParsedArg`] |
184 | pub fn next(&self, cursor: &mut ArgCursor) -> Option<ParsedArg<'_>> { |
185 | self.next_os(cursor).map(ParsedArg::new) |
186 | } |
187 | |
188 | /// Advance the cursor, returning a raw argument value. |
189 | pub fn next_os(&self, cursor: &mut ArgCursor) -> Option<&OsStr> { |
190 | let next = self.items.get(cursor.cursor).map(|s| s.as_os_str()); |
191 | cursor.cursor = cursor.cursor.saturating_add(1); |
192 | next |
193 | } |
194 | |
195 | /// Return the next [`ParsedArg`] |
196 | pub fn peek(&self, cursor: &ArgCursor) -> Option<ParsedArg<'_>> { |
197 | self.peek_os(cursor).map(ParsedArg::new) |
198 | } |
199 | |
200 | /// Return a raw argument value. |
201 | pub fn peek_os(&self, cursor: &ArgCursor) -> Option<&OsStr> { |
202 | self.items.get(cursor.cursor).map(|s| s.as_os_str()) |
203 | } |
204 | |
205 | /// Return all remaining raw arguments, advancing the cursor to the end |
206 | /// |
207 | /// # Example |
208 | /// |
209 | /// ```rust,no_run |
210 | /// # use std::path::PathBuf; |
211 | /// let raw = clap_lex::RawArgs::new(["bin" , "foo.txt" ]); |
212 | /// let mut cursor = raw.cursor(); |
213 | /// let _bin = raw.next_os(&mut cursor); |
214 | /// |
215 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); |
216 | /// println!("{paths:?}" ); |
217 | /// ``` |
218 | pub fn remaining(&self, cursor: &mut ArgCursor) -> impl Iterator<Item = &OsStr> { |
219 | let remaining = self.items[cursor.cursor..].iter().map(|s| s.as_os_str()); |
220 | cursor.cursor = self.items.len(); |
221 | remaining |
222 | } |
223 | |
224 | /// Adjust the cursor's position |
225 | pub fn seek(&self, cursor: &mut ArgCursor, pos: SeekFrom) { |
226 | let pos = match pos { |
227 | SeekFrom::Start(pos) => pos, |
228 | SeekFrom::End(pos) => (self.items.len() as i64).saturating_add(pos).max(0) as u64, |
229 | SeekFrom::Current(pos) => (cursor.cursor as i64).saturating_add(pos).max(0) as u64, |
230 | }; |
231 | let pos = (pos as usize).min(self.items.len()); |
232 | cursor.cursor = pos; |
233 | } |
234 | |
235 | /// Inject arguments before the [`RawArgs::next`] |
236 | pub fn insert( |
237 | &mut self, |
238 | cursor: &ArgCursor, |
239 | insert_items: impl IntoIterator<Item = impl Into<OsString>>, |
240 | ) { |
241 | self.items.splice( |
242 | cursor.cursor..cursor.cursor, |
243 | insert_items.into_iter().map(Into::into), |
244 | ); |
245 | } |
246 | |
247 | /// Any remaining args? |
248 | pub fn is_end(&self, cursor: &ArgCursor) -> bool { |
249 | self.peek_os(cursor).is_none() |
250 | } |
251 | } |
252 | |
253 | impl<I, T> From<I> for RawArgs |
254 | where |
255 | I: Iterator<Item = T>, |
256 | T: Into<OsString>, |
257 | { |
258 | fn from(val: I) -> Self { |
259 | Self { |
260 | items: val.map(|x: T| x.into()).collect(), |
261 | } |
262 | } |
263 | } |
264 | |
265 | /// Position within [`RawArgs`] |
266 | #[derive (Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] |
267 | pub struct ArgCursor { |
268 | cursor: usize, |
269 | } |
270 | |
271 | impl ArgCursor { |
272 | fn new() -> Self { |
273 | Self { cursor: 0 } |
274 | } |
275 | } |
276 | |
277 | /// Command-line Argument |
278 | #[derive (Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |
279 | pub struct ParsedArg<'s> { |
280 | inner: &'s OsStr, |
281 | } |
282 | |
283 | impl<'s> ParsedArg<'s> { |
284 | fn new(inner: &'s OsStr) -> Self { |
285 | Self { inner } |
286 | } |
287 | |
288 | /// Argument is length of 0 |
289 | pub fn is_empty(&self) -> bool { |
290 | self.inner.is_empty() |
291 | } |
292 | |
293 | /// Does the argument look like a stdio argument (`-`) |
294 | pub fn is_stdio(&self) -> bool { |
295 | self.inner == "-" |
296 | } |
297 | |
298 | /// Does the argument look like an argument escape (`--`) |
299 | pub fn is_escape(&self) -> bool { |
300 | self.inner == "--" |
301 | } |
302 | |
303 | /// Does the argument look like a number |
304 | pub fn is_number(&self) -> bool { |
305 | self.to_value() |
306 | .map(|s| s.parse::<f64>().is_ok()) |
307 | .unwrap_or_default() |
308 | } |
309 | |
310 | /// Treat as a long-flag |
311 | pub fn to_long(&self) -> Option<(Result<&str, &OsStr>, Option<&OsStr>)> { |
312 | let raw = self.inner; |
313 | let remainder = raw.strip_prefix("--" )?; |
314 | if remainder.is_empty() { |
315 | debug_assert!(self.is_escape()); |
316 | return None; |
317 | } |
318 | |
319 | let (flag, value) = if let Some((p0, p1)) = remainder.split_once("=" ) { |
320 | (p0, Some(p1)) |
321 | } else { |
322 | (remainder, None) |
323 | }; |
324 | let flag = flag.to_str().ok_or(flag); |
325 | Some((flag, value)) |
326 | } |
327 | |
328 | /// Can treat as a long-flag |
329 | pub fn is_long(&self) -> bool { |
330 | self.inner.starts_with("--" ) && !self.is_escape() |
331 | } |
332 | |
333 | /// Treat as a short-flag |
334 | pub fn to_short(&self) -> Option<ShortFlags<'_>> { |
335 | if let Some(remainder_os) = self.inner.strip_prefix("-" ) { |
336 | if remainder_os.starts_with("-" ) { |
337 | None |
338 | } else if remainder_os.is_empty() { |
339 | debug_assert!(self.is_stdio()); |
340 | None |
341 | } else { |
342 | Some(ShortFlags::new(remainder_os)) |
343 | } |
344 | } else { |
345 | None |
346 | } |
347 | } |
348 | |
349 | /// Can treat as a short-flag |
350 | pub fn is_short(&self) -> bool { |
351 | self.inner.starts_with("-" ) && !self.is_stdio() && !self.inner.starts_with("--" ) |
352 | } |
353 | |
354 | /// Treat as a value |
355 | /// |
356 | /// **NOTE:** May return a flag or an escape. |
357 | pub fn to_value_os(&self) -> &OsStr { |
358 | self.inner |
359 | } |
360 | |
361 | /// Treat as a value |
362 | /// |
363 | /// **NOTE:** May return a flag or an escape. |
364 | pub fn to_value(&self) -> Result<&str, &OsStr> { |
365 | self.inner.to_str().ok_or(self.inner) |
366 | } |
367 | |
368 | /// Safely print an argument that may contain non-UTF8 content |
369 | /// |
370 | /// This may perform lossy conversion, depending on the platform. If you would like an implementation which escapes the path please use Debug instead. |
371 | pub fn display(&self) -> impl std::fmt::Display + '_ { |
372 | self.inner.to_string_lossy() |
373 | } |
374 | } |
375 | |
376 | /// Walk through short flags within a [`ParsedArg`] |
377 | #[derive (Clone, Debug)] |
378 | pub struct ShortFlags<'s> { |
379 | inner: &'s OsStr, |
380 | utf8_prefix: std::str::CharIndices<'s>, |
381 | invalid_suffix: Option<&'s OsStr>, |
382 | } |
383 | |
384 | impl<'s> ShortFlags<'s> { |
385 | fn new(inner: &'s OsStr) -> Self { |
386 | let (utf8_prefix, invalid_suffix) = split_nonutf8_once(inner); |
387 | let utf8_prefix = utf8_prefix.char_indices(); |
388 | Self { |
389 | inner, |
390 | utf8_prefix, |
391 | invalid_suffix, |
392 | } |
393 | } |
394 | |
395 | /// Move the iterator forward by `n` short flags |
396 | pub fn advance_by(&mut self, n: usize) -> Result<(), usize> { |
397 | for i in 0..n { |
398 | self.next().ok_or(i)?.map_err(|_| i)?; |
399 | } |
400 | Ok(()) |
401 | } |
402 | |
403 | /// No short flags left |
404 | pub fn is_empty(&self) -> bool { |
405 | self.invalid_suffix.is_none() && self.utf8_prefix.as_str().is_empty() |
406 | } |
407 | |
408 | /// Does the short flag look like a number |
409 | /// |
410 | /// Ideally call this before doing any iterator |
411 | pub fn is_number(&self) -> bool { |
412 | self.invalid_suffix.is_none() && self.utf8_prefix.as_str().parse::<f64>().is_ok() |
413 | } |
414 | |
415 | /// Advance the iterator, returning the next short flag on success |
416 | /// |
417 | /// On error, returns the invalid-UTF8 value |
418 | pub fn next_flag(&mut self) -> Option<Result<char, &'s OsStr>> { |
419 | if let Some((_, flag)) = self.utf8_prefix.next() { |
420 | return Some(Ok(flag)); |
421 | } |
422 | |
423 | if let Some(suffix) = self.invalid_suffix { |
424 | self.invalid_suffix = None; |
425 | return Some(Err(suffix)); |
426 | } |
427 | |
428 | None |
429 | } |
430 | |
431 | /// Advance the iterator, returning everything left as a value |
432 | pub fn next_value_os(&mut self) -> Option<&'s OsStr> { |
433 | if let Some((index, _)) = self.utf8_prefix.next() { |
434 | self.utf8_prefix = "" .char_indices(); |
435 | self.invalid_suffix = None; |
436 | // SAFETY: `char_indices` ensures `index` is at a valid UTF-8 boundary |
437 | let remainder = unsafe { ext::split_at(self.inner, index).1 }; |
438 | return Some(remainder); |
439 | } |
440 | |
441 | if let Some(suffix) = self.invalid_suffix { |
442 | self.invalid_suffix = None; |
443 | return Some(suffix); |
444 | } |
445 | |
446 | None |
447 | } |
448 | } |
449 | |
450 | impl<'s> Iterator for ShortFlags<'s> { |
451 | type Item = Result<char, &'s OsStr>; |
452 | |
453 | fn next(&mut self) -> Option<Self::Item> { |
454 | self.next_flag() |
455 | } |
456 | } |
457 | |
458 | fn split_nonutf8_once(b: &OsStr) -> (&str, Option<&OsStr>) { |
459 | match b.try_str() { |
460 | Ok(s: &str) => (s, None), |
461 | Err(err: Utf8Error) => { |
462 | // SAFETY: `char_indices` ensures `index` is at a valid UTF-8 boundary |
463 | let (valid: &OsStr, after_valid: &OsStr) = unsafe { ext::split_at(os:b, index:err.valid_up_to()) }; |
464 | let valid: &str = valid.try_str().unwrap(); |
465 | (valid, Some(after_valid)) |
466 | } |
467 | } |
468 | } |
469 | |