| 1 | // Copyright 2015 Nicholas Allegra (comex). | 
| 2 | // Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or | 
|---|
| 3 | // the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be | 
|---|
| 4 | // copied, modified, or distributed except according to those terms. | 
|---|
| 5 |  | 
|---|
| 6 | //! Parse strings like, and escape strings for, POSIX shells. | 
|---|
| 7 | //! | 
|---|
| 8 | //! Same idea as (but implementation not directly based on) the Python shlex module. | 
|---|
| 9 | //! | 
|---|
| 10 | //! Disabling the `std` feature (which is enabled by default) will allow the crate to work in | 
|---|
| 11 | //! `no_std` environments, where the `alloc` crate, and a global allocator, are available. | 
|---|
| 12 | //! | 
|---|
| 13 | //! ## <span style="color:red">Warning</span> | 
|---|
| 14 | //! | 
|---|
| 15 | //! The [`try_quote`]/[`try_join`] family of APIs does not quote control characters (because they | 
|---|
| 16 | //! cannot be quoted portably). | 
|---|
| 17 | //! | 
|---|
| 18 | //! This is fully safe in noninteractive contexts, like shell scripts and `sh -c` arguments (or | 
|---|
| 19 | //! even scripts `source`d from interactive shells). | 
|---|
| 20 | //! | 
|---|
| 21 | //! But if you are quoting for human consumption, you should keep in mind that ugly inputs produce | 
|---|
| 22 | //! ugly outputs (which may not be copy-pastable). | 
|---|
| 23 | //! | 
|---|
| 24 | //! And if by chance you are piping the output of [`try_quote`]/[`try_join`] directly to the stdin | 
|---|
| 25 | //! of an interactive shell, you should stop, because control characters can lead to arbitrary | 
|---|
| 26 | //! command injection. | 
|---|
| 27 | //! | 
|---|
| 28 | //! For more information, and for information about more minor issues, please see [quoting_warning]. | 
|---|
| 29 | //! | 
|---|
| 30 | //! ## Compatibility | 
|---|
| 31 | //! | 
|---|
| 32 | //! This crate's quoting functionality tries to be compatible with **any POSIX-compatible shell**; | 
|---|
| 33 | //! it's tested against `bash`, `zsh`, `dash`, Busybox `ash`, and `mksh`, plus `fish` (which is not | 
|---|
| 34 | //! POSIX-compatible but close enough). | 
|---|
| 35 | //! | 
|---|
| 36 | //! It also aims to be compatible with Python `shlex` and C `wordexp`. | 
|---|
| 37 |  | 
|---|
| 38 | #![ cfg_attr(not(feature = "std"), no_std)] | 
|---|
| 39 |  | 
|---|
| 40 | extern crate alloc; | 
|---|
| 41 | use alloc::vec::Vec; | 
|---|
| 42 | use alloc::borrow::Cow; | 
|---|
| 43 | use alloc::string::String; | 
|---|
| 44 | #[ cfg(test)] | 
|---|
| 45 | use alloc::vec; | 
|---|
| 46 | #[ cfg(test)] | 
|---|
| 47 | use alloc::borrow::ToOwned; | 
|---|
| 48 |  | 
|---|
| 49 | pub mod bytes; | 
|---|
| 50 | #[ cfg(all(doc, not(doctest)))] | 
|---|
| 51 | #[ path= "quoting_warning.md"] | 
|---|
| 52 | pub mod quoting_warning; | 
|---|
| 53 |  | 
|---|
| 54 | /// An iterator that takes an input string and splits it into the words using the same syntax as | 
|---|
| 55 | /// the POSIX shell. | 
|---|
| 56 | /// | 
|---|
| 57 | /// See [`bytes::Shlex`]. | 
|---|
| 58 | pub struct Shlex<'a>(bytes::Shlex<'a>); | 
|---|
| 59 |  | 
|---|
| 60 | impl<'a> Shlex<'a> { | 
|---|
| 61 | pub fn new(in_str: &'a str) -> Self { | 
|---|
| 62 | Self(bytes::Shlex::new(in_bytes:in_str.as_bytes())) | 
|---|
| 63 | } | 
|---|
| 64 | } | 
|---|
| 65 |  | 
|---|
| 66 | impl<'a> Iterator for Shlex<'a> { | 
|---|
| 67 | type Item = String; | 
|---|
| 68 | fn next(&mut self) -> Option<String> { | 
|---|
| 69 | self.0.next().map(|byte_word: Vec| { | 
|---|
| 70 | // Safety: given valid UTF-8, bytes::Shlex will always return valid UTF-8. | 
|---|
| 71 | unsafe { String::from_utf8_unchecked(bytes:byte_word) } | 
|---|
| 72 | }) | 
|---|
| 73 | } | 
|---|
| 74 | } | 
|---|
| 75 |  | 
|---|
| 76 | impl<'a> core::ops::Deref for Shlex<'a> { | 
|---|
| 77 | type Target = bytes::Shlex<'a>; | 
|---|
| 78 |  | 
|---|
| 79 | fn deref(&self) -> &Self::Target { | 
|---|
| 80 | &self.0 | 
|---|
| 81 | } | 
|---|
| 82 | } | 
|---|
| 83 |  | 
|---|
| 84 | impl<'a> core::ops::DerefMut for Shlex<'a> { | 
|---|
| 85 | fn deref_mut(&mut self) -> &mut Self::Target { | 
|---|
| 86 | &mut self.0 | 
|---|
| 87 | } | 
|---|
| 88 | } | 
|---|
| 89 |  | 
|---|
| 90 | /// Convenience function that consumes the whole string at once.  Returns None if the input was | 
|---|
| 91 | /// erroneous. | 
|---|
| 92 | pub fn split(in_str: &str) -> Option<Vec<String>> { | 
|---|
| 93 | let mut shl: Shlex<'_> = Shlex::new(in_str); | 
|---|
| 94 | let res: Vec = shl.by_ref().collect(); | 
|---|
| 95 | if shl.had_error { None } else { Some(res) } | 
|---|
| 96 | } | 
|---|
| 97 |  | 
|---|
| 98 | /// Errors from [`Quoter::quote`], [`Quoter::join`], etc. (and their [`bytes`] counterparts). | 
|---|
| 99 | /// | 
|---|
| 100 | /// By default, the only error that can be returned is [`QuoteError::Nul`].  If you call | 
|---|
| 101 | /// `allow_nul(true)`, then no errors can be returned at all.  Any error variants added in the | 
|---|
| 102 | /// future will not be enabled by default; they will be enabled through corresponding non-default | 
|---|
| 103 | /// [`Quoter`] options. | 
|---|
| 104 | /// | 
|---|
| 105 | /// ...In theory.  In the unlikely event that additional classes of inputs are discovered that, | 
|---|
| 106 | /// like nul bytes, are fundamentally unsafe to quote even for non-interactive shells, the risk | 
|---|
| 107 | /// will be mitigated by adding corresponding [`QuoteError`] variants that *are* enabled by | 
|---|
| 108 | /// default. | 
|---|
| 109 | #[ non_exhaustive] | 
|---|
| 110 | #[ derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | 
|---|
| 111 | pub enum QuoteError { | 
|---|
| 112 | /// The input contained a nul byte.  In most cases, shells fundamentally [cannot handle strings | 
|---|
| 113 | /// containing nul bytes](quoting_warning#nul-bytes), no matter how they are quoted.  But if | 
|---|
| 114 | /// you're sure you can handle nul bytes, you can call `allow_nul(true)` on the `Quoter` to let | 
|---|
| 115 | /// them pass through. | 
|---|
| 116 | Nul, | 
|---|
| 117 | } | 
|---|
| 118 |  | 
|---|
| 119 | impl core::fmt::Display for QuoteError { | 
|---|
| 120 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { | 
|---|
| 121 | match self { | 
|---|
| 122 | QuoteError::Nul => f.write_str(data: "cannot shell-quote string containing nul byte"), | 
|---|
| 123 | } | 
|---|
| 124 | } | 
|---|
| 125 | } | 
|---|
| 126 |  | 
|---|
| 127 | #[ cfg(feature = "std")] | 
|---|
| 128 | impl std::error::Error for QuoteError {} | 
|---|
| 129 |  | 
|---|
| 130 | /// A more configurable interface to quote strings.  If you only want the default settings you can | 
|---|
| 131 | /// use the convenience functions [`try_quote`] and [`try_join`]. | 
|---|
| 132 | /// | 
|---|
| 133 | /// The bytes equivalent is [`bytes::Quoter`]. | 
|---|
| 134 | #[ derive(Default, Debug, Clone)] | 
|---|
| 135 | pub struct Quoter { | 
|---|
| 136 | inner: bytes::Quoter, | 
|---|
| 137 | } | 
|---|
| 138 |  | 
|---|
| 139 | impl Quoter { | 
|---|
| 140 | /// Create a new [`Quoter`] with default settings. | 
|---|
| 141 | #[ inline] | 
|---|
| 142 | pub fn new() -> Self { | 
|---|
| 143 | Self::default() | 
|---|
| 144 | } | 
|---|
| 145 |  | 
|---|
| 146 | /// Set whether to allow [nul bytes](quoting_warning#nul-bytes).  By default they are not | 
|---|
| 147 | /// allowed and will result in an error of [`QuoteError::Nul`]. | 
|---|
| 148 | #[ inline] | 
|---|
| 149 | pub fn allow_nul(mut self, allow: bool) -> Self { | 
|---|
| 150 | self.inner = self.inner.allow_nul(allow); | 
|---|
| 151 | self | 
|---|
| 152 | } | 
|---|
| 153 |  | 
|---|
| 154 | /// Convenience function that consumes an iterable of words and turns it into a single string, | 
|---|
| 155 | /// quoting words when necessary. Consecutive words will be separated by a single space. | 
|---|
| 156 | pub fn join<'a, I: IntoIterator<Item = &'a str>>(&self, words: I) -> Result<String, QuoteError> { | 
|---|
| 157 | // Safety: given valid UTF-8, bytes::join() will always return valid UTF-8. | 
|---|
| 158 | self.inner.join(words.into_iter().map(|s| s.as_bytes())) | 
|---|
| 159 | .map(|bytes| unsafe { String::from_utf8_unchecked(bytes) }) | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | /// Given a single word, return a string suitable to encode it as a shell argument. | 
|---|
| 163 | pub fn quote<'a>(&self, in_str: &'a str) -> Result<Cow<'a, str>, QuoteError> { | 
|---|
| 164 | Ok(match self.inner.quote(in_str.as_bytes())? { | 
|---|
| 165 | Cow::Borrowed(out) => { | 
|---|
| 166 | // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8. | 
|---|
| 167 | unsafe { core::str::from_utf8_unchecked(out) }.into() | 
|---|
| 168 | } | 
|---|
| 169 | Cow::Owned(out) => { | 
|---|
| 170 | // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8. | 
|---|
| 171 | unsafe { String::from_utf8_unchecked(out) }.into() | 
|---|
| 172 | } | 
|---|
| 173 | }) | 
|---|
| 174 | } | 
|---|
| 175 | } | 
|---|
| 176 |  | 
|---|
| 177 | impl From<bytes::Quoter> for Quoter { | 
|---|
| 178 | fn from(inner: bytes::Quoter) -> Quoter { | 
|---|
| 179 | Quoter { inner } | 
|---|
| 180 | } | 
|---|
| 181 | } | 
|---|
| 182 |  | 
|---|
| 183 | impl From<Quoter> for bytes::Quoter { | 
|---|
| 184 | fn from(quoter: Quoter) -> bytes::Quoter { | 
|---|
| 185 | quoter.inner | 
|---|
| 186 | } | 
|---|
| 187 | } | 
|---|
| 188 |  | 
|---|
| 189 | /// Convenience function that consumes an iterable of words and turns it into a single string, | 
|---|
| 190 | /// quoting words when necessary. Consecutive words will be separated by a single space. | 
|---|
| 191 | /// | 
|---|
| 192 | /// Uses default settings except that nul bytes are passed through, which [may be | 
|---|
| 193 | /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. | 
|---|
| 194 | /// | 
|---|
| 195 | /// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter). | 
|---|
| 196 | /// | 
|---|
| 197 | /// (That configuration never returns `Err`, so this function does not panic.) | 
|---|
| 198 | /// | 
|---|
| 199 | /// The bytes equivalent is [bytes::join]. | 
|---|
| 200 | #[ deprecated(since = "1.3.0", note = "replace with `try_join(words)?` to avoid nul byte danger")] | 
|---|
| 201 | pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String { | 
|---|
| 202 | Quoter::new().allow_nul(allow:true).join(words).unwrap() | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 | /// Convenience function that consumes an iterable of words and turns it into a single string, | 
|---|
| 206 | /// quoting words when necessary. Consecutive words will be separated by a single space. | 
|---|
| 207 | /// | 
|---|
| 208 | /// Uses default settings.  The only error that can be returned is [`QuoteError::Nul`]. | 
|---|
| 209 | /// | 
|---|
| 210 | /// Equivalent to [`Quoter::new().join(words)`](Quoter). | 
|---|
| 211 | /// | 
|---|
| 212 | /// The bytes equivalent is [bytes::try_join]. | 
|---|
| 213 | pub fn try_join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> Result<String, QuoteError> { | 
|---|
| 214 | Quoter::new().join(words) | 
|---|
| 215 | } | 
|---|
| 216 |  | 
|---|
| 217 | /// Given a single word, return a string suitable to encode it as a shell argument. | 
|---|
| 218 | /// | 
|---|
| 219 | /// Uses default settings except that nul bytes are passed through, which [may be | 
|---|
| 220 | /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. | 
|---|
| 221 | /// | 
|---|
| 222 | /// Equivalent to [`Quoter::new().allow_nul(true).quote(in_str).unwrap()`](Quoter). | 
|---|
| 223 | /// | 
|---|
| 224 | /// (That configuration never returns `Err`, so this function does not panic.) | 
|---|
| 225 | /// | 
|---|
| 226 | /// The bytes equivalent is [bytes::quote]. | 
|---|
| 227 | #[ deprecated(since = "1.3.0", note = "replace with `try_quote(str)?` to avoid nul byte danger")] | 
|---|
| 228 | pub fn quote(in_str: &str) -> Cow<str> { | 
|---|
| 229 | Quoter::new().allow_nul(allow:true).quote(in_str).unwrap() | 
|---|
| 230 | } | 
|---|
| 231 |  | 
|---|
| 232 | /// Given a single word, return a string suitable to encode it as a shell argument. | 
|---|
| 233 | /// | 
|---|
| 234 | /// Uses default settings.  The only error that can be returned is [`QuoteError::Nul`]. | 
|---|
| 235 | /// | 
|---|
| 236 | /// Equivalent to [`Quoter::new().quote(in_str)`](Quoter). | 
|---|
| 237 | /// | 
|---|
| 238 | /// (That configuration never returns `Err`, so this function does not panic.) | 
|---|
| 239 | /// | 
|---|
| 240 | /// The bytes equivalent is [bytes::try_quote]. | 
|---|
| 241 | pub fn try_quote(in_str: &str) -> Result<Cow<str>, QuoteError> { | 
|---|
| 242 | Quoter::new().quote(in_str) | 
|---|
| 243 | } | 
|---|
| 244 |  | 
|---|
| 245 | #[ cfg(test)] | 
|---|
| 246 | static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[ | 
|---|
| 247 | ( "foo$baz", Some(&[ "foo$baz"])), | 
|---|
| 248 | ( "foo baz", Some(&[ "foo", "baz"])), | 
|---|
| 249 | ( "foo\" bar\" baz", Some(&[ "foobarbaz"])), | 
|---|
| 250 | ( "foo \" bar\" baz", Some(&[ "foo", "barbaz"])), | 
|---|
| 251 | ( "   foo \n bar", Some(&[ "foo", "bar"])), | 
|---|
| 252 | ( "foo\\\n bar", Some(&[ "foobar"])), | 
|---|
| 253 | ( "\" foo\\\n bar\" ", Some(&[ "foobar"])), | 
|---|
| 254 | ( "'baz\\ $b'", Some(&[ "baz\\ $b"])), | 
|---|
| 255 | ( "'baz\\\' '", None), | 
|---|
| 256 | ( "\\ ", None), | 
|---|
| 257 | ( "\"\\ ", None), | 
|---|
| 258 | ( "'\\ ", None), | 
|---|
| 259 | ( "\" ", None), | 
|---|
| 260 | ( "'", None), | 
|---|
| 261 | ( "foo #bar\n baz", Some(&[ "foo", "baz"])), | 
|---|
| 262 | ( "foo #bar", Some(&[ "foo"])), | 
|---|
| 263 | ( "foo#bar", Some(&[ "foo#bar"])), | 
|---|
| 264 | ( "foo\" #bar", None), | 
|---|
| 265 | ( "'\\ n'", Some(&[ "\\ n"])), | 
|---|
| 266 | ( "'\\\\ n'", Some(&[ "\\\\ n"])), | 
|---|
| 267 | ]; | 
|---|
| 268 |  | 
|---|
| 269 | #[ test] | 
|---|
| 270 | fn test_split() { | 
|---|
| 271 | for &(input, output) in SPLIT_TEST_ITEMS { | 
|---|
| 272 | assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect())); | 
|---|
| 273 | } | 
|---|
| 274 | } | 
|---|
| 275 |  | 
|---|
| 276 | #[ test] | 
|---|
| 277 | fn test_lineno() { | 
|---|
| 278 | let mut sh = Shlex::new( "\n foo\n bar"); | 
|---|
| 279 | while let Some(word) = sh.next() { | 
|---|
| 280 | if word == "bar"{ | 
|---|
| 281 | assert_eq!(sh.line_no, 3); | 
|---|
| 282 | } | 
|---|
| 283 | } | 
|---|
| 284 | } | 
|---|
| 285 |  | 
|---|
| 286 | #[ test] | 
|---|
| 287 | #[ cfg_attr(not(feature = "std"), allow(unreachable_code, unused_mut))] | 
|---|
| 288 | fn test_quote() { | 
|---|
| 289 | // This is a list of (unquoted, quoted) pairs. | 
|---|
| 290 | // But it's using a single long (raw) string literal with an ad-hoc format, just because it's | 
|---|
| 291 | // hard to read if we have to put the test strings through Rust escaping on top of the escaping | 
|---|
| 292 | // being tested.  (Even raw string literals are noisy for short strings). | 
|---|
| 293 | // Ad-hoc: "NL" is replaced with a literal newline; no other escape sequences. | 
|---|
| 294 | let tests = r#" | 
|---|
| 295 |         <>                => <''> | 
|---|
| 296 |         <foobar>          => <foobar> | 
|---|
| 297 |         <foo bar>         => <'foo bar'> | 
|---|
| 298 |         <"foo bar'">      => <"\"foo bar'\""> | 
|---|
| 299 |         <'foo bar'>       => <"'foo bar'"> | 
|---|
| 300 |         <">               => <'"'> | 
|---|
| 301 |         <"'>              => <"\"'"> | 
|---|
| 302 |         <hello!world>     => <'hello!world'> | 
|---|
| 303 |         <'hello!world>    => <"'hello"'!world'> | 
|---|
| 304 |         <'hello!>         => <"'hello"'!'> | 
|---|
| 305 |         <hello ^ world>   => <'hello ''^ world'> | 
|---|
| 306 |         <hello^>          => <hello'^'> | 
|---|
| 307 |         <!world'>         => <'!world'"'"> | 
|---|
| 308 |         <{a, b}>          => <'{a, b}'> | 
|---|
| 309 |         <NL>              => <'NL'> | 
|---|
| 310 |         <^>               => <'^'> | 
|---|
| 311 |         <foo^bar>         => <foo'^bar'> | 
|---|
| 312 |         <NLx^>            => <'NLx''^'> | 
|---|
| 313 |         <NL^x>            => <'NL''^x'> | 
|---|
| 314 |         <NL ^x>           => <'NL ''^x'> | 
|---|
| 315 |         <{a,b}>           => <'{a,b}'> | 
|---|
| 316 |         <a,b>             => <'a,b'> | 
|---|
| 317 |         <a..b             => <a..b> | 
|---|
| 318 |         <'$>              => <"'"'$'> | 
|---|
| 319 |         <"^>              => <'"''^'> | 
|---|
| 320 |     "#; | 
|---|
| 321 | let mut ok = true; | 
|---|
| 322 | for test in tests.trim().split( '\n ') { | 
|---|
| 323 | let parts: Vec<String> = test | 
|---|
| 324 | .replace( "NL", "\n ") | 
|---|
| 325 | .split( "=>") | 
|---|
| 326 | .map(|part| part.trim().trim_start_matches( '<').trim_end_matches( '>').to_owned()) | 
|---|
| 327 | .collect(); | 
|---|
| 328 | assert!(parts.len() == 2); | 
|---|
| 329 | let unquoted = &*parts[0]; | 
|---|
| 330 | let quoted_expected = &*parts[1]; | 
|---|
| 331 | let quoted_actual = try_quote(&parts[0]).unwrap(); | 
|---|
| 332 | if quoted_expected != quoted_actual { | 
|---|
| 333 | #[ cfg(not(feature = "std"))] | 
|---|
| 334 | panic!( "FAIL: for input <{}>, expected <{}>, got <{}>", | 
|---|
| 335 | unquoted, quoted_expected, quoted_actual); | 
|---|
| 336 | #[ cfg(feature = "std")] | 
|---|
| 337 | println!( "FAIL: for input <{}>, expected <{}>, got <{}>", | 
|---|
| 338 | unquoted, quoted_expected, quoted_actual); | 
|---|
| 339 | ok = false; | 
|---|
| 340 | } | 
|---|
| 341 | } | 
|---|
| 342 | assert!(ok); | 
|---|
| 343 | } | 
|---|
| 344 |  | 
|---|
| 345 | #[ test] | 
|---|
| 346 | #[ allow(deprecated)] | 
|---|
| 347 | fn test_join() { | 
|---|
| 348 | assert_eq!(join(vec![]), ""); | 
|---|
| 349 | assert_eq!(join(vec![ ""]), "''"); | 
|---|
| 350 | assert_eq!(join(vec![ "a", "b"]), "a b"); | 
|---|
| 351 | assert_eq!(join(vec![ "foo bar", "baz"]), "'foo bar' baz"); | 
|---|
| 352 | } | 
|---|
| 353 |  | 
|---|
| 354 | #[ test] | 
|---|
| 355 | fn test_fallible() { | 
|---|
| 356 | assert_eq!(try_join(vec![ "\0 "]), Err(QuoteError::Nul)); | 
|---|
| 357 | assert_eq!(try_quote( "\0 "), Err(QuoteError::Nul)); | 
|---|
| 358 | } | 
|---|
| 359 |  | 
|---|