| 1 | // Copyright 2015 Nicholas Allegra (comex). |
| 2 | // Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or |
| 3 | // the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be |
| 4 | // copied, modified, or distributed except according to those terms. |
| 5 | |
| 6 | //! Parse strings like, and escape strings for, POSIX shells. |
| 7 | //! |
| 8 | //! Same idea as (but implementation not directly based on) the Python shlex module. |
| 9 | //! |
| 10 | //! Disabling the `std` feature (which is enabled by default) will allow the crate to work in |
| 11 | //! `no_std` environments, where the `alloc` crate, and a global allocator, are available. |
| 12 | //! |
| 13 | //! ## <span style="color:red">Warning</span> |
| 14 | //! |
| 15 | //! The [`try_quote`]/[`try_join`] family of APIs does not quote control characters (because they |
| 16 | //! cannot be quoted portably). |
| 17 | //! |
| 18 | //! This is fully safe in noninteractive contexts, like shell scripts and `sh -c` arguments (or |
| 19 | //! even scripts `source`d from interactive shells). |
| 20 | //! |
| 21 | //! But if you are quoting for human consumption, you should keep in mind that ugly inputs produce |
| 22 | //! ugly outputs (which may not be copy-pastable). |
| 23 | //! |
| 24 | //! And if by chance you are piping the output of [`try_quote`]/[`try_join`] directly to the stdin |
| 25 | //! of an interactive shell, you should stop, because control characters can lead to arbitrary |
| 26 | //! command injection. |
| 27 | //! |
| 28 | //! For more information, and for information about more minor issues, please see [quoting_warning]. |
| 29 | //! |
| 30 | //! ## Compatibility |
| 31 | //! |
| 32 | //! This crate's quoting functionality tries to be compatible with **any POSIX-compatible shell**; |
| 33 | //! it's tested against `bash`, `zsh`, `dash`, Busybox `ash`, and `mksh`, plus `fish` (which is not |
| 34 | //! POSIX-compatible but close enough). |
| 35 | //! |
| 36 | //! It also aims to be compatible with Python `shlex` and C `wordexp`. |
| 37 | |
| 38 | #![cfg_attr (not(feature = "std" ), no_std)] |
| 39 | |
| 40 | extern crate alloc; |
| 41 | use alloc::vec::Vec; |
| 42 | use alloc::borrow::Cow; |
| 43 | use alloc::string::String; |
| 44 | #[cfg (test)] |
| 45 | use alloc::vec; |
| 46 | #[cfg (test)] |
| 47 | use alloc::borrow::ToOwned; |
| 48 | |
| 49 | pub mod bytes; |
| 50 | #[cfg (all(doc, not(doctest)))] |
| 51 | #[path = "quoting_warning.md" ] |
| 52 | pub mod quoting_warning; |
| 53 | |
| 54 | /// An iterator that takes an input string and splits it into the words using the same syntax as |
| 55 | /// the POSIX shell. |
| 56 | /// |
| 57 | /// See [`bytes::Shlex`]. |
| 58 | pub struct Shlex<'a>(bytes::Shlex<'a>); |
| 59 | |
| 60 | impl<'a> Shlex<'a> { |
| 61 | pub fn new(in_str: &'a str) -> Self { |
| 62 | Self(bytes::Shlex::new(in_bytes:in_str.as_bytes())) |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | impl<'a> Iterator for Shlex<'a> { |
| 67 | type Item = String; |
| 68 | fn next(&mut self) -> Option<String> { |
| 69 | self.0.next().map(|byte_word: Vec| { |
| 70 | // Safety: given valid UTF-8, bytes::Shlex will always return valid UTF-8. |
| 71 | unsafe { String::from_utf8_unchecked(bytes:byte_word) } |
| 72 | }) |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | impl<'a> core::ops::Deref for Shlex<'a> { |
| 77 | type Target = bytes::Shlex<'a>; |
| 78 | |
| 79 | fn deref(&self) -> &Self::Target { |
| 80 | &self.0 |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | impl<'a> core::ops::DerefMut for Shlex<'a> { |
| 85 | fn deref_mut(&mut self) -> &mut Self::Target { |
| 86 | &mut self.0 |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | /// Convenience function that consumes the whole string at once. Returns None if the input was |
| 91 | /// erroneous. |
| 92 | pub fn split(in_str: &str) -> Option<Vec<String>> { |
| 93 | let mut shl: Shlex<'_> = Shlex::new(in_str); |
| 94 | let res: Vec = shl.by_ref().collect(); |
| 95 | if shl.had_error { None } else { Some(res) } |
| 96 | } |
| 97 | |
| 98 | /// Errors from [`Quoter::quote`], [`Quoter::join`], etc. (and their [`bytes`] counterparts). |
| 99 | /// |
| 100 | /// By default, the only error that can be returned is [`QuoteError::Nul`]. If you call |
| 101 | /// `allow_nul(true)`, then no errors can be returned at all. Any error variants added in the |
| 102 | /// future will not be enabled by default; they will be enabled through corresponding non-default |
| 103 | /// [`Quoter`] options. |
| 104 | /// |
| 105 | /// ...In theory. In the unlikely event that additional classes of inputs are discovered that, |
| 106 | /// like nul bytes, are fundamentally unsafe to quote even for non-interactive shells, the risk |
| 107 | /// will be mitigated by adding corresponding [`QuoteError`] variants that *are* enabled by |
| 108 | /// default. |
| 109 | #[non_exhaustive ] |
| 110 | #[derive (Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
| 111 | pub enum QuoteError { |
| 112 | /// The input contained a nul byte. In most cases, shells fundamentally [cannot handle strings |
| 113 | /// containing nul bytes](quoting_warning#nul-bytes), no matter how they are quoted. But if |
| 114 | /// you're sure you can handle nul bytes, you can call `allow_nul(true)` on the `Quoter` to let |
| 115 | /// them pass through. |
| 116 | Nul, |
| 117 | } |
| 118 | |
| 119 | impl core::fmt::Display for QuoteError { |
| 120 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| 121 | match self { |
| 122 | QuoteError::Nul => f.write_str(data:"cannot shell-quote string containing nul byte" ), |
| 123 | } |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | #[cfg (feature = "std" )] |
| 128 | impl std::error::Error for QuoteError {} |
| 129 | |
| 130 | /// A more configurable interface to quote strings. If you only want the default settings you can |
| 131 | /// use the convenience functions [`try_quote`] and [`try_join`]. |
| 132 | /// |
| 133 | /// The bytes equivalent is [`bytes::Quoter`]. |
| 134 | #[derive (Default, Debug, Clone)] |
| 135 | pub struct Quoter { |
| 136 | inner: bytes::Quoter, |
| 137 | } |
| 138 | |
| 139 | impl Quoter { |
| 140 | /// Create a new [`Quoter`] with default settings. |
| 141 | #[inline ] |
| 142 | pub fn new() -> Self { |
| 143 | Self::default() |
| 144 | } |
| 145 | |
| 146 | /// Set whether to allow [nul bytes](quoting_warning#nul-bytes). By default they are not |
| 147 | /// allowed and will result in an error of [`QuoteError::Nul`]. |
| 148 | #[inline ] |
| 149 | pub fn allow_nul(mut self, allow: bool) -> Self { |
| 150 | self.inner = self.inner.allow_nul(allow); |
| 151 | self |
| 152 | } |
| 153 | |
| 154 | /// Convenience function that consumes an iterable of words and turns it into a single string, |
| 155 | /// quoting words when necessary. Consecutive words will be separated by a single space. |
| 156 | pub fn join<'a, I: IntoIterator<Item = &'a str>>(&self, words: I) -> Result<String, QuoteError> { |
| 157 | // Safety: given valid UTF-8, bytes::join() will always return valid UTF-8. |
| 158 | self.inner.join(words.into_iter().map(|s| s.as_bytes())) |
| 159 | .map(|bytes| unsafe { String::from_utf8_unchecked(bytes) }) |
| 160 | } |
| 161 | |
| 162 | /// Given a single word, return a string suitable to encode it as a shell argument. |
| 163 | pub fn quote<'a>(&self, in_str: &'a str) -> Result<Cow<'a, str>, QuoteError> { |
| 164 | Ok(match self.inner.quote(in_str.as_bytes())? { |
| 165 | Cow::Borrowed(out) => { |
| 166 | // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8. |
| 167 | unsafe { core::str::from_utf8_unchecked(out) }.into() |
| 168 | } |
| 169 | Cow::Owned(out) => { |
| 170 | // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8. |
| 171 | unsafe { String::from_utf8_unchecked(out) }.into() |
| 172 | } |
| 173 | }) |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | impl From<bytes::Quoter> for Quoter { |
| 178 | fn from(inner: bytes::Quoter) -> Quoter { |
| 179 | Quoter { inner } |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | impl From<Quoter> for bytes::Quoter { |
| 184 | fn from(quoter: Quoter) -> bytes::Quoter { |
| 185 | quoter.inner |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | /// Convenience function that consumes an iterable of words and turns it into a single string, |
| 190 | /// quoting words when necessary. Consecutive words will be separated by a single space. |
| 191 | /// |
| 192 | /// Uses default settings except that nul bytes are passed through, which [may be |
| 193 | /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. |
| 194 | /// |
| 195 | /// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter). |
| 196 | /// |
| 197 | /// (That configuration never returns `Err`, so this function does not panic.) |
| 198 | /// |
| 199 | /// The bytes equivalent is [bytes::join]. |
| 200 | #[deprecated (since = "1.3.0" , note = "replace with `try_join(words)?` to avoid nul byte danger" )] |
| 201 | pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String { |
| 202 | Quoter::new().allow_nul(allow:true).join(words).unwrap() |
| 203 | } |
| 204 | |
| 205 | /// Convenience function that consumes an iterable of words and turns it into a single string, |
| 206 | /// quoting words when necessary. Consecutive words will be separated by a single space. |
| 207 | /// |
| 208 | /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. |
| 209 | /// |
| 210 | /// Equivalent to [`Quoter::new().join(words)`](Quoter). |
| 211 | /// |
| 212 | /// The bytes equivalent is [bytes::try_join]. |
| 213 | pub fn try_join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> Result<String, QuoteError> { |
| 214 | Quoter::new().join(words) |
| 215 | } |
| 216 | |
| 217 | /// Given a single word, return a string suitable to encode it as a shell argument. |
| 218 | /// |
| 219 | /// Uses default settings except that nul bytes are passed through, which [may be |
| 220 | /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. |
| 221 | /// |
| 222 | /// Equivalent to [`Quoter::new().allow_nul(true).quote(in_str).unwrap()`](Quoter). |
| 223 | /// |
| 224 | /// (That configuration never returns `Err`, so this function does not panic.) |
| 225 | /// |
| 226 | /// The bytes equivalent is [bytes::quote]. |
| 227 | #[deprecated (since = "1.3.0" , note = "replace with `try_quote(str)?` to avoid nul byte danger" )] |
| 228 | pub fn quote(in_str: &str) -> Cow<str> { |
| 229 | Quoter::new().allow_nul(allow:true).quote(in_str).unwrap() |
| 230 | } |
| 231 | |
| 232 | /// Given a single word, return a string suitable to encode it as a shell argument. |
| 233 | /// |
| 234 | /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. |
| 235 | /// |
| 236 | /// Equivalent to [`Quoter::new().quote(in_str)`](Quoter). |
| 237 | /// |
| 238 | /// (That configuration never returns `Err`, so this function does not panic.) |
| 239 | /// |
| 240 | /// The bytes equivalent is [bytes::try_quote]. |
| 241 | pub fn try_quote(in_str: &str) -> Result<Cow<str>, QuoteError> { |
| 242 | Quoter::new().quote(in_str) |
| 243 | } |
| 244 | |
| 245 | #[cfg (test)] |
| 246 | static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[ |
| 247 | ("foo$baz" , Some(&["foo$baz" ])), |
| 248 | ("foo baz" , Some(&["foo" , "baz" ])), |
| 249 | ("foo \"bar \"baz" , Some(&["foobarbaz" ])), |
| 250 | ("foo \"bar \"baz" , Some(&["foo" , "barbaz" ])), |
| 251 | (" foo \nbar" , Some(&["foo" , "bar" ])), |
| 252 | ("foo \\\nbar" , Some(&["foobar" ])), |
| 253 | (" \"foo \\\nbar \"" , Some(&["foobar" ])), |
| 254 | ("'baz \\$b'" , Some(&["baz \\$b" ])), |
| 255 | ("'baz \\\''" , None), |
| 256 | (" \\" , None), |
| 257 | (" \"\\" , None), |
| 258 | ("' \\" , None), |
| 259 | (" \"" , None), |
| 260 | ("'" , None), |
| 261 | ("foo #bar \nbaz" , Some(&["foo" , "baz" ])), |
| 262 | ("foo #bar" , Some(&["foo" ])), |
| 263 | ("foo#bar" , Some(&["foo#bar" ])), |
| 264 | ("foo \"#bar" , None), |
| 265 | ("' \\n'" , Some(&[" \\n" ])), |
| 266 | ("' \\\\n'" , Some(&[" \\\\n" ])), |
| 267 | ]; |
| 268 | |
| 269 | #[test ] |
| 270 | fn test_split() { |
| 271 | for &(input, output) in SPLIT_TEST_ITEMS { |
| 272 | assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect())); |
| 273 | } |
| 274 | } |
| 275 | |
| 276 | #[test ] |
| 277 | fn test_lineno() { |
| 278 | let mut sh = Shlex::new(" \nfoo \nbar" ); |
| 279 | while let Some(word) = sh.next() { |
| 280 | if word == "bar" { |
| 281 | assert_eq!(sh.line_no, 3); |
| 282 | } |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | #[test ] |
| 287 | #[cfg_attr (not(feature = "std" ), allow(unreachable_code, unused_mut))] |
| 288 | fn test_quote() { |
| 289 | // This is a list of (unquoted, quoted) pairs. |
| 290 | // But it's using a single long (raw) string literal with an ad-hoc format, just because it's |
| 291 | // hard to read if we have to put the test strings through Rust escaping on top of the escaping |
| 292 | // being tested. (Even raw string literals are noisy for short strings). |
| 293 | // Ad-hoc: "NL" is replaced with a literal newline; no other escape sequences. |
| 294 | let tests = r#" |
| 295 | <> => <''> |
| 296 | <foobar> => <foobar> |
| 297 | <foo bar> => <'foo bar'> |
| 298 | <"foo bar'"> => <"\"foo bar'\""> |
| 299 | <'foo bar'> => <"'foo bar'"> |
| 300 | <"> => <'"'> |
| 301 | <"'> => <"\"'"> |
| 302 | <hello!world> => <'hello!world'> |
| 303 | <'hello!world> => <"'hello"'!world'> |
| 304 | <'hello!> => <"'hello"'!'> |
| 305 | <hello ^ world> => <'hello ''^ world'> |
| 306 | <hello^> => <hello'^'> |
| 307 | <!world'> => <'!world'"'"> |
| 308 | <{a, b}> => <'{a, b}'> |
| 309 | <NL> => <'NL'> |
| 310 | <^> => <'^'> |
| 311 | <foo^bar> => <foo'^bar'> |
| 312 | <NLx^> => <'NLx''^'> |
| 313 | <NL^x> => <'NL''^x'> |
| 314 | <NL ^x> => <'NL ''^x'> |
| 315 | <{a,b}> => <'{a,b}'> |
| 316 | <a,b> => <'a,b'> |
| 317 | <a..b => <a..b> |
| 318 | <'$> => <"'"'$'> |
| 319 | <"^> => <'"''^'> |
| 320 | "# ; |
| 321 | let mut ok = true; |
| 322 | for test in tests.trim().split(' \n' ) { |
| 323 | let parts: Vec<String> = test |
| 324 | .replace("NL" , " \n" ) |
| 325 | .split("=>" ) |
| 326 | .map(|part| part.trim().trim_start_matches('<' ).trim_end_matches('>' ).to_owned()) |
| 327 | .collect(); |
| 328 | assert!(parts.len() == 2); |
| 329 | let unquoted = &*parts[0]; |
| 330 | let quoted_expected = &*parts[1]; |
| 331 | let quoted_actual = try_quote(&parts[0]).unwrap(); |
| 332 | if quoted_expected != quoted_actual { |
| 333 | #[cfg (not(feature = "std" ))] |
| 334 | panic!("FAIL: for input <{}>, expected <{}>, got <{}>" , |
| 335 | unquoted, quoted_expected, quoted_actual); |
| 336 | #[cfg (feature = "std" )] |
| 337 | println!("FAIL: for input <{}>, expected <{}>, got <{}>" , |
| 338 | unquoted, quoted_expected, quoted_actual); |
| 339 | ok = false; |
| 340 | } |
| 341 | } |
| 342 | assert!(ok); |
| 343 | } |
| 344 | |
| 345 | #[test ] |
| 346 | #[allow (deprecated)] |
| 347 | fn test_join() { |
| 348 | assert_eq!(join(vec![]), "" ); |
| 349 | assert_eq!(join(vec!["" ]), "''" ); |
| 350 | assert_eq!(join(vec!["a" , "b" ]), "a b" ); |
| 351 | assert_eq!(join(vec!["foo bar" , "baz" ]), "'foo bar' baz" ); |
| 352 | } |
| 353 | |
| 354 | #[test ] |
| 355 | fn test_fallible() { |
| 356 | assert_eq!(try_join(vec![" \0" ]), Err(QuoteError::Nul)); |
| 357 | assert_eq!(try_quote(" \0" ), Err(QuoteError::Nul)); |
| 358 | } |
| 359 | |