1// Copyright 2015 Nicholas Allegra (comex).
2// Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or
3// the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be
4// copied, modified, or distributed except according to those terms.
5
6//! Parse strings like, and escape strings for, POSIX shells.
7//!
8//! Same idea as (but implementation not directly based on) the Python shlex module.
9//!
10//! Disabling the `std` feature (which is enabled by default) will allow the crate to work in
11//! `no_std` environments, where the `alloc` crate, and a global allocator, are available.
12//!
13//! ## <span style="color:red">Warning</span>
14//!
15//! The [`try_quote`]/[`try_join`] family of APIs does not quote control characters (because they
16//! cannot be quoted portably).
17//!
18//! This is fully safe in noninteractive contexts, like shell scripts and `sh -c` arguments (or
19//! even scripts `source`d from interactive shells).
20//!
21//! But if you are quoting for human consumption, you should keep in mind that ugly inputs produce
22//! ugly outputs (which may not be copy-pastable).
23//!
24//! And if by chance you are piping the output of [`try_quote`]/[`try_join`] directly to the stdin
25//! of an interactive shell, you should stop, because control characters can lead to arbitrary
26//! command injection.
27//!
28//! For more information, and for information about more minor issues, please see [quoting_warning].
29//!
30//! ## Compatibility
31//!
32//! This crate's quoting functionality tries to be compatible with **any POSIX-compatible shell**;
33//! it's tested against `bash`, `zsh`, `dash`, Busybox `ash`, and `mksh`, plus `fish` (which is not
34//! POSIX-compatible but close enough).
35//!
36//! It also aims to be compatible with Python `shlex` and C `wordexp`.
37
38#![cfg_attr(not(feature = "std"), no_std)]
39
40extern crate alloc;
41use alloc::vec::Vec;
42use alloc::borrow::Cow;
43use alloc::string::String;
44#[cfg(test)]
45use alloc::vec;
46#[cfg(test)]
47use alloc::borrow::ToOwned;
48
49pub mod bytes;
50#[cfg(all(doc, not(doctest)))]
51#[path = "quoting_warning.md"]
52pub mod quoting_warning;
53
54/// An iterator that takes an input string and splits it into the words using the same syntax as
55/// the POSIX shell.
56///
57/// See [`bytes::Shlex`].
58pub struct Shlex<'a>(bytes::Shlex<'a>);
59
60impl<'a> Shlex<'a> {
61 pub fn new(in_str: &'a str) -> Self {
62 Self(bytes::Shlex::new(in_bytes:in_str.as_bytes()))
63 }
64}
65
66impl<'a> Iterator for Shlex<'a> {
67 type Item = String;
68 fn next(&mut self) -> Option<String> {
69 self.0.next().map(|byte_word: Vec| {
70 // Safety: given valid UTF-8, bytes::Shlex will always return valid UTF-8.
71 unsafe { String::from_utf8_unchecked(bytes:byte_word) }
72 })
73 }
74}
75
76impl<'a> core::ops::Deref for Shlex<'a> {
77 type Target = bytes::Shlex<'a>;
78
79 fn deref(&self) -> &Self::Target {
80 &self.0
81 }
82}
83
84impl<'a> core::ops::DerefMut for Shlex<'a> {
85 fn deref_mut(&mut self) -> &mut Self::Target {
86 &mut self.0
87 }
88}
89
90/// Convenience function that consumes the whole string at once. Returns None if the input was
91/// erroneous.
92pub fn split(in_str: &str) -> Option<Vec<String>> {
93 let mut shl: Shlex<'_> = Shlex::new(in_str);
94 let res: Vec = shl.by_ref().collect();
95 if shl.had_error { None } else { Some(res) }
96}
97
98/// Errors from [`Quoter::quote`], [`Quoter::join`], etc. (and their [`bytes`] counterparts).
99///
100/// By default, the only error that can be returned is [`QuoteError::Nul`]. If you call
101/// `allow_nul(true)`, then no errors can be returned at all. Any error variants added in the
102/// future will not be enabled by default; they will be enabled through corresponding non-default
103/// [`Quoter`] options.
104///
105/// ...In theory. In the unlikely event that additional classes of inputs are discovered that,
106/// like nul bytes, are fundamentally unsafe to quote even for non-interactive shells, the risk
107/// will be mitigated by adding corresponding [`QuoteError`] variants that *are* enabled by
108/// default.
109#[non_exhaustive]
110#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
111pub enum QuoteError {
112 /// The input contained a nul byte. In most cases, shells fundamentally [cannot handle strings
113 /// containing nul bytes](quoting_warning#nul-bytes), no matter how they are quoted. But if
114 /// you're sure you can handle nul bytes, you can call `allow_nul(true)` on the `Quoter` to let
115 /// them pass through.
116 Nul,
117}
118
119impl core::fmt::Display for QuoteError {
120 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
121 match self {
122 QuoteError::Nul => f.write_str(data:"cannot shell-quote string containing nul byte"),
123 }
124 }
125}
126
127#[cfg(feature = "std")]
128impl std::error::Error for QuoteError {}
129
130/// A more configurable interface to quote strings. If you only want the default settings you can
131/// use the convenience functions [`try_quote`] and [`try_join`].
132///
133/// The bytes equivalent is [`bytes::Quoter`].
134#[derive(Default, Debug, Clone)]
135pub struct Quoter {
136 inner: bytes::Quoter,
137}
138
139impl Quoter {
140 /// Create a new [`Quoter`] with default settings.
141 #[inline]
142 pub fn new() -> Self {
143 Self::default()
144 }
145
146 /// Set whether to allow [nul bytes](quoting_warning#nul-bytes). By default they are not
147 /// allowed and will result in an error of [`QuoteError::Nul`].
148 #[inline]
149 pub fn allow_nul(mut self, allow: bool) -> Self {
150 self.inner = self.inner.allow_nul(allow);
151 self
152 }
153
154 /// Convenience function that consumes an iterable of words and turns it into a single string,
155 /// quoting words when necessary. Consecutive words will be separated by a single space.
156 pub fn join<'a, I: IntoIterator<Item = &'a str>>(&self, words: I) -> Result<String, QuoteError> {
157 // Safety: given valid UTF-8, bytes::join() will always return valid UTF-8.
158 self.inner.join(words.into_iter().map(|s| s.as_bytes()))
159 .map(|bytes| unsafe { String::from_utf8_unchecked(bytes) })
160 }
161
162 /// Given a single word, return a string suitable to encode it as a shell argument.
163 pub fn quote<'a>(&self, in_str: &'a str) -> Result<Cow<'a, str>, QuoteError> {
164 Ok(match self.inner.quote(in_str.as_bytes())? {
165 Cow::Borrowed(out) => {
166 // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8.
167 unsafe { core::str::from_utf8_unchecked(out) }.into()
168 }
169 Cow::Owned(out) => {
170 // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8.
171 unsafe { String::from_utf8_unchecked(out) }.into()
172 }
173 })
174 }
175}
176
177impl From<bytes::Quoter> for Quoter {
178 fn from(inner: bytes::Quoter) -> Quoter {
179 Quoter { inner }
180 }
181}
182
183impl From<Quoter> for bytes::Quoter {
184 fn from(quoter: Quoter) -> bytes::Quoter {
185 quoter.inner
186 }
187}
188
189/// Convenience function that consumes an iterable of words and turns it into a single string,
190/// quoting words when necessary. Consecutive words will be separated by a single space.
191///
192/// Uses default settings except that nul bytes are passed through, which [may be
193/// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated.
194///
195/// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter).
196///
197/// (That configuration never returns `Err`, so this function does not panic.)
198///
199/// The bytes equivalent is [bytes::join].
200#[deprecated(since = "1.3.0", note = "replace with `try_join(words)?` to avoid nul byte danger")]
201pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String {
202 Quoter::new().allow_nul(allow:true).join(words).unwrap()
203}
204
205/// Convenience function that consumes an iterable of words and turns it into a single string,
206/// quoting words when necessary. Consecutive words will be separated by a single space.
207///
208/// Uses default settings. The only error that can be returned is [`QuoteError::Nul`].
209///
210/// Equivalent to [`Quoter::new().join(words)`](Quoter).
211///
212/// The bytes equivalent is [bytes::try_join].
213pub fn try_join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> Result<String, QuoteError> {
214 Quoter::new().join(words)
215}
216
217/// Given a single word, return a string suitable to encode it as a shell argument.
218///
219/// Uses default settings except that nul bytes are passed through, which [may be
220/// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated.
221///
222/// Equivalent to [`Quoter::new().allow_nul(true).quote(in_str).unwrap()`](Quoter).
223///
224/// (That configuration never returns `Err`, so this function does not panic.)
225///
226/// The bytes equivalent is [bytes::quote].
227#[deprecated(since = "1.3.0", note = "replace with `try_quote(str)?` to avoid nul byte danger")]
228pub fn quote(in_str: &str) -> Cow<str> {
229 Quoter::new().allow_nul(allow:true).quote(in_str).unwrap()
230}
231
232/// Given a single word, return a string suitable to encode it as a shell argument.
233///
234/// Uses default settings. The only error that can be returned is [`QuoteError::Nul`].
235///
236/// Equivalent to [`Quoter::new().quote(in_str)`](Quoter).
237///
238/// (That configuration never returns `Err`, so this function does not panic.)
239///
240/// The bytes equivalent is [bytes::try_quote].
241pub fn try_quote(in_str: &str) -> Result<Cow<str>, QuoteError> {
242 Quoter::new().quote(in_str)
243}
244
245#[cfg(test)]
246static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[
247 ("foo$baz", Some(&["foo$baz"])),
248 ("foo baz", Some(&["foo", "baz"])),
249 ("foo\"bar\"baz", Some(&["foobarbaz"])),
250 ("foo \"bar\"baz", Some(&["foo", "barbaz"])),
251 (" foo \nbar", Some(&["foo", "bar"])),
252 ("foo\\\nbar", Some(&["foobar"])),
253 ("\"foo\\\nbar\"", Some(&["foobar"])),
254 ("'baz\\$b'", Some(&["baz\\$b"])),
255 ("'baz\\\''", None),
256 ("\\", None),
257 ("\"\\", None),
258 ("'\\", None),
259 ("\"", None),
260 ("'", None),
261 ("foo #bar\nbaz", Some(&["foo", "baz"])),
262 ("foo #bar", Some(&["foo"])),
263 ("foo#bar", Some(&["foo#bar"])),
264 ("foo\"#bar", None),
265 ("'\\n'", Some(&["\\n"])),
266 ("'\\\\n'", Some(&["\\\\n"])),
267];
268
269#[test]
270fn test_split() {
271 for &(input: &str, output) in SPLIT_TEST_ITEMS {
272 assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect()));
273 }
274}
275
276#[test]
277fn test_lineno() {
278 let mut sh: Shlex<'_> = Shlex::new(in_str:"\nfoo\nbar");
279 while let Some(word: String) = sh.next() {
280 if word == "bar" {
281 assert_eq!(sh.line_no, 3);
282 }
283 }
284}
285
286#[test]
287#[cfg_attr(not(feature = "std"), allow(unreachable_code, unused_mut))]
288fn test_quote() {
289 // This is a list of (unquoted, quoted) pairs.
290 // But it's using a single long (raw) string literal with an ad-hoc format, just because it's
291 // hard to read if we have to put the test strings through Rust escaping on top of the escaping
292 // being tested. (Even raw string literals are noisy for short strings).
293 // Ad-hoc: "NL" is replaced with a literal newline; no other escape sequences.
294 let tests = r#"
295 <> => <''>
296 <foobar> => <foobar>
297 <foo bar> => <'foo bar'>
298 <"foo bar'"> => <"\"foo bar'\"">
299 <'foo bar'> => <"'foo bar'">
300 <"> => <'"'>
301 <"'> => <"\"'">
302 <hello!world> => <'hello!world'>
303 <'hello!world> => <"'hello"'!world'>
304 <'hello!> => <"'hello"'!'>
305 <hello ^ world> => <'hello ''^ world'>
306 <hello^> => <hello'^'>
307 <!world'> => <'!world'"'">
308 <{a, b}> => <'{a, b}'>
309 <NL> => <'NL'>
310 <^> => <'^'>
311 <foo^bar> => <foo'^bar'>
312 <NLx^> => <'NLx''^'>
313 <NL^x> => <'NL''^x'>
314 <NL ^x> => <'NL ''^x'>
315 <{a,b}> => <'{a,b}'>
316 <a,b> => <'a,b'>
317 <a..b => <a..b>
318 <'$> => <"'"'$'>
319 <"^> => <'"''^'>
320 "#;
321 let mut ok = true;
322 for test in tests.trim().split('\n') {
323 let parts: Vec<String> = test
324 .replace("NL", "\n")
325 .split("=>")
326 .map(|part| part.trim().trim_start_matches('<').trim_end_matches('>').to_owned())
327 .collect();
328 assert!(parts.len() == 2);
329 let unquoted = &*parts[0];
330 let quoted_expected = &*parts[1];
331 let quoted_actual = try_quote(&parts[0]).unwrap();
332 if quoted_expected != quoted_actual {
333 #[cfg(not(feature = "std"))]
334 panic!("FAIL: for input <{}>, expected <{}>, got <{}>",
335 unquoted, quoted_expected, quoted_actual);
336 #[cfg(feature = "std")]
337 println!("FAIL: for input <{}>, expected <{}>, got <{}>",
338 unquoted, quoted_expected, quoted_actual);
339 ok = false;
340 }
341 }
342 assert!(ok);
343}
344
345#[test]
346#[allow(deprecated)]
347fn test_join() {
348 assert_eq!(join(vec![]), "");
349 assert_eq!(join(vec![""]), "''");
350 assert_eq!(join(vec!["a", "b"]), "a b");
351 assert_eq!(join(vec!["foo bar", "baz"]), "'foo bar' baz");
352}
353
354#[test]
355fn test_fallible() {
356 assert_eq!(try_join(vec!["\0"]), Err(QuoteError::Nul));
357 assert_eq!(try_quote("\0"), Err(QuoteError::Nul));
358}
359