1 | // Copyright 2015 Nicholas Allegra (comex). |
2 | // Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or |
3 | // the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be |
4 | // copied, modified, or distributed except according to those terms. |
5 | |
6 | //! Parse strings like, and escape strings for, POSIX shells. |
7 | //! |
8 | //! Same idea as (but implementation not directly based on) the Python shlex module. |
9 | //! |
10 | //! Disabling the `std` feature (which is enabled by default) will allow the crate to work in |
11 | //! `no_std` environments, where the `alloc` crate, and a global allocator, are available. |
12 | //! |
13 | //! ## <span style="color:red">Warning</span> |
14 | //! |
15 | //! The [`try_quote`]/[`try_join`] family of APIs does not quote control characters (because they |
16 | //! cannot be quoted portably). |
17 | //! |
18 | //! This is fully safe in noninteractive contexts, like shell scripts and `sh -c` arguments (or |
19 | //! even scripts `source`d from interactive shells). |
20 | //! |
21 | //! But if you are quoting for human consumption, you should keep in mind that ugly inputs produce |
22 | //! ugly outputs (which may not be copy-pastable). |
23 | //! |
24 | //! And if by chance you are piping the output of [`try_quote`]/[`try_join`] directly to the stdin |
25 | //! of an interactive shell, you should stop, because control characters can lead to arbitrary |
26 | //! command injection. |
27 | //! |
28 | //! For more information, and for information about more minor issues, please see [quoting_warning]. |
29 | //! |
30 | //! ## Compatibility |
31 | //! |
32 | //! This crate's quoting functionality tries to be compatible with **any POSIX-compatible shell**; |
33 | //! it's tested against `bash`, `zsh`, `dash`, Busybox `ash`, and `mksh`, plus `fish` (which is not |
34 | //! POSIX-compatible but close enough). |
35 | //! |
36 | //! It also aims to be compatible with Python `shlex` and C `wordexp`. |
37 | |
38 | #![cfg_attr (not(feature = "std" ), no_std)] |
39 | |
40 | extern crate alloc; |
41 | use alloc::vec::Vec; |
42 | use alloc::borrow::Cow; |
43 | use alloc::string::String; |
44 | #[cfg (test)] |
45 | use alloc::vec; |
46 | #[cfg (test)] |
47 | use alloc::borrow::ToOwned; |
48 | |
49 | pub mod bytes; |
50 | #[cfg (all(doc, not(doctest)))] |
51 | #[path = "quoting_warning.md" ] |
52 | pub mod quoting_warning; |
53 | |
54 | /// An iterator that takes an input string and splits it into the words using the same syntax as |
55 | /// the POSIX shell. |
56 | /// |
57 | /// See [`bytes::Shlex`]. |
58 | pub struct Shlex<'a>(bytes::Shlex<'a>); |
59 | |
60 | impl<'a> Shlex<'a> { |
61 | pub fn new(in_str: &'a str) -> Self { |
62 | Self(bytes::Shlex::new(in_bytes:in_str.as_bytes())) |
63 | } |
64 | } |
65 | |
66 | impl<'a> Iterator for Shlex<'a> { |
67 | type Item = String; |
68 | fn next(&mut self) -> Option<String> { |
69 | self.0.next().map(|byte_word: Vec| { |
70 | // Safety: given valid UTF-8, bytes::Shlex will always return valid UTF-8. |
71 | unsafe { String::from_utf8_unchecked(bytes:byte_word) } |
72 | }) |
73 | } |
74 | } |
75 | |
76 | impl<'a> core::ops::Deref for Shlex<'a> { |
77 | type Target = bytes::Shlex<'a>; |
78 | |
79 | fn deref(&self) -> &Self::Target { |
80 | &self.0 |
81 | } |
82 | } |
83 | |
84 | impl<'a> core::ops::DerefMut for Shlex<'a> { |
85 | fn deref_mut(&mut self) -> &mut Self::Target { |
86 | &mut self.0 |
87 | } |
88 | } |
89 | |
90 | /// Convenience function that consumes the whole string at once. Returns None if the input was |
91 | /// erroneous. |
92 | pub fn split(in_str: &str) -> Option<Vec<String>> { |
93 | let mut shl: Shlex<'_> = Shlex::new(in_str); |
94 | let res: Vec = shl.by_ref().collect(); |
95 | if shl.had_error { None } else { Some(res) } |
96 | } |
97 | |
98 | /// Errors from [`Quoter::quote`], [`Quoter::join`], etc. (and their [`bytes`] counterparts). |
99 | /// |
100 | /// By default, the only error that can be returned is [`QuoteError::Nul`]. If you call |
101 | /// `allow_nul(true)`, then no errors can be returned at all. Any error variants added in the |
102 | /// future will not be enabled by default; they will be enabled through corresponding non-default |
103 | /// [`Quoter`] options. |
104 | /// |
105 | /// ...In theory. In the unlikely event that additional classes of inputs are discovered that, |
106 | /// like nul bytes, are fundamentally unsafe to quote even for non-interactive shells, the risk |
107 | /// will be mitigated by adding corresponding [`QuoteError`] variants that *are* enabled by |
108 | /// default. |
109 | #[non_exhaustive ] |
110 | #[derive (Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
111 | pub enum QuoteError { |
112 | /// The input contained a nul byte. In most cases, shells fundamentally [cannot handle strings |
113 | /// containing nul bytes](quoting_warning#nul-bytes), no matter how they are quoted. But if |
114 | /// you're sure you can handle nul bytes, you can call `allow_nul(true)` on the `Quoter` to let |
115 | /// them pass through. |
116 | Nul, |
117 | } |
118 | |
119 | impl core::fmt::Display for QuoteError { |
120 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
121 | match self { |
122 | QuoteError::Nul => f.write_str(data:"cannot shell-quote string containing nul byte" ), |
123 | } |
124 | } |
125 | } |
126 | |
127 | #[cfg (feature = "std" )] |
128 | impl std::error::Error for QuoteError {} |
129 | |
130 | /// A more configurable interface to quote strings. If you only want the default settings you can |
131 | /// use the convenience functions [`try_quote`] and [`try_join`]. |
132 | /// |
133 | /// The bytes equivalent is [`bytes::Quoter`]. |
134 | #[derive (Default, Debug, Clone)] |
135 | pub struct Quoter { |
136 | inner: bytes::Quoter, |
137 | } |
138 | |
139 | impl Quoter { |
140 | /// Create a new [`Quoter`] with default settings. |
141 | #[inline ] |
142 | pub fn new() -> Self { |
143 | Self::default() |
144 | } |
145 | |
146 | /// Set whether to allow [nul bytes](quoting_warning#nul-bytes). By default they are not |
147 | /// allowed and will result in an error of [`QuoteError::Nul`]. |
148 | #[inline ] |
149 | pub fn allow_nul(mut self, allow: bool) -> Self { |
150 | self.inner = self.inner.allow_nul(allow); |
151 | self |
152 | } |
153 | |
154 | /// Convenience function that consumes an iterable of words and turns it into a single string, |
155 | /// quoting words when necessary. Consecutive words will be separated by a single space. |
156 | pub fn join<'a, I: IntoIterator<Item = &'a str>>(&self, words: I) -> Result<String, QuoteError> { |
157 | // Safety: given valid UTF-8, bytes::join() will always return valid UTF-8. |
158 | self.inner.join(words.into_iter().map(|s| s.as_bytes())) |
159 | .map(|bytes| unsafe { String::from_utf8_unchecked(bytes) }) |
160 | } |
161 | |
162 | /// Given a single word, return a string suitable to encode it as a shell argument. |
163 | pub fn quote<'a>(&self, in_str: &'a str) -> Result<Cow<'a, str>, QuoteError> { |
164 | Ok(match self.inner.quote(in_str.as_bytes())? { |
165 | Cow::Borrowed(out) => { |
166 | // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8. |
167 | unsafe { core::str::from_utf8_unchecked(out) }.into() |
168 | } |
169 | Cow::Owned(out) => { |
170 | // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8. |
171 | unsafe { String::from_utf8_unchecked(out) }.into() |
172 | } |
173 | }) |
174 | } |
175 | } |
176 | |
177 | impl From<bytes::Quoter> for Quoter { |
178 | fn from(inner: bytes::Quoter) -> Quoter { |
179 | Quoter { inner } |
180 | } |
181 | } |
182 | |
183 | impl From<Quoter> for bytes::Quoter { |
184 | fn from(quoter: Quoter) -> bytes::Quoter { |
185 | quoter.inner |
186 | } |
187 | } |
188 | |
189 | /// Convenience function that consumes an iterable of words and turns it into a single string, |
190 | /// quoting words when necessary. Consecutive words will be separated by a single space. |
191 | /// |
192 | /// Uses default settings except that nul bytes are passed through, which [may be |
193 | /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. |
194 | /// |
195 | /// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter). |
196 | /// |
197 | /// (That configuration never returns `Err`, so this function does not panic.) |
198 | /// |
199 | /// The bytes equivalent is [bytes::join]. |
200 | #[deprecated (since = "1.3.0" , note = "replace with `try_join(words)?` to avoid nul byte danger" )] |
201 | pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String { |
202 | Quoter::new().allow_nul(allow:true).join(words).unwrap() |
203 | } |
204 | |
205 | /// Convenience function that consumes an iterable of words and turns it into a single string, |
206 | /// quoting words when necessary. Consecutive words will be separated by a single space. |
207 | /// |
208 | /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. |
209 | /// |
210 | /// Equivalent to [`Quoter::new().join(words)`](Quoter). |
211 | /// |
212 | /// The bytes equivalent is [bytes::try_join]. |
213 | pub fn try_join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> Result<String, QuoteError> { |
214 | Quoter::new().join(words) |
215 | } |
216 | |
217 | /// Given a single word, return a string suitable to encode it as a shell argument. |
218 | /// |
219 | /// Uses default settings except that nul bytes are passed through, which [may be |
220 | /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. |
221 | /// |
222 | /// Equivalent to [`Quoter::new().allow_nul(true).quote(in_str).unwrap()`](Quoter). |
223 | /// |
224 | /// (That configuration never returns `Err`, so this function does not panic.) |
225 | /// |
226 | /// The bytes equivalent is [bytes::quote]. |
227 | #[deprecated (since = "1.3.0" , note = "replace with `try_quote(str)?` to avoid nul byte danger" )] |
228 | pub fn quote(in_str: &str) -> Cow<str> { |
229 | Quoter::new().allow_nul(allow:true).quote(in_str).unwrap() |
230 | } |
231 | |
232 | /// Given a single word, return a string suitable to encode it as a shell argument. |
233 | /// |
234 | /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. |
235 | /// |
236 | /// Equivalent to [`Quoter::new().quote(in_str)`](Quoter). |
237 | /// |
238 | /// (That configuration never returns `Err`, so this function does not panic.) |
239 | /// |
240 | /// The bytes equivalent is [bytes::try_quote]. |
241 | pub fn try_quote(in_str: &str) -> Result<Cow<str>, QuoteError> { |
242 | Quoter::new().quote(in_str) |
243 | } |
244 | |
245 | #[cfg (test)] |
246 | static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[ |
247 | ("foo$baz" , Some(&["foo$baz" ])), |
248 | ("foo baz" , Some(&["foo" , "baz" ])), |
249 | ("foo \"bar \"baz" , Some(&["foobarbaz" ])), |
250 | ("foo \"bar \"baz" , Some(&["foo" , "barbaz" ])), |
251 | (" foo \nbar" , Some(&["foo" , "bar" ])), |
252 | ("foo \\\nbar" , Some(&["foobar" ])), |
253 | (" \"foo \\\nbar \"" , Some(&["foobar" ])), |
254 | ("'baz \\$b'" , Some(&["baz \\$b" ])), |
255 | ("'baz \\\''" , None), |
256 | (" \\" , None), |
257 | (" \"\\" , None), |
258 | ("' \\" , None), |
259 | (" \"" , None), |
260 | ("'" , None), |
261 | ("foo #bar \nbaz" , Some(&["foo" , "baz" ])), |
262 | ("foo #bar" , Some(&["foo" ])), |
263 | ("foo#bar" , Some(&["foo#bar" ])), |
264 | ("foo \"#bar" , None), |
265 | ("' \\n'" , Some(&[" \\n" ])), |
266 | ("' \\\\n'" , Some(&[" \\\\n" ])), |
267 | ]; |
268 | |
269 | #[test ] |
270 | fn test_split() { |
271 | for &(input: &str, output) in SPLIT_TEST_ITEMS { |
272 | assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect())); |
273 | } |
274 | } |
275 | |
276 | #[test ] |
277 | fn test_lineno() { |
278 | let mut sh: Shlex<'_> = Shlex::new(in_str:" \nfoo \nbar" ); |
279 | while let Some(word: String) = sh.next() { |
280 | if word == "bar" { |
281 | assert_eq!(sh.line_no, 3); |
282 | } |
283 | } |
284 | } |
285 | |
286 | #[test ] |
287 | #[cfg_attr (not(feature = "std" ), allow(unreachable_code, unused_mut))] |
288 | fn test_quote() { |
289 | // This is a list of (unquoted, quoted) pairs. |
290 | // But it's using a single long (raw) string literal with an ad-hoc format, just because it's |
291 | // hard to read if we have to put the test strings through Rust escaping on top of the escaping |
292 | // being tested. (Even raw string literals are noisy for short strings). |
293 | // Ad-hoc: "NL" is replaced with a literal newline; no other escape sequences. |
294 | let tests = r#" |
295 | <> => <''> |
296 | <foobar> => <foobar> |
297 | <foo bar> => <'foo bar'> |
298 | <"foo bar'"> => <"\"foo bar'\""> |
299 | <'foo bar'> => <"'foo bar'"> |
300 | <"> => <'"'> |
301 | <"'> => <"\"'"> |
302 | <hello!world> => <'hello!world'> |
303 | <'hello!world> => <"'hello"'!world'> |
304 | <'hello!> => <"'hello"'!'> |
305 | <hello ^ world> => <'hello ''^ world'> |
306 | <hello^> => <hello'^'> |
307 | <!world'> => <'!world'"'"> |
308 | <{a, b}> => <'{a, b}'> |
309 | <NL> => <'NL'> |
310 | <^> => <'^'> |
311 | <foo^bar> => <foo'^bar'> |
312 | <NLx^> => <'NLx''^'> |
313 | <NL^x> => <'NL''^x'> |
314 | <NL ^x> => <'NL ''^x'> |
315 | <{a,b}> => <'{a,b}'> |
316 | <a,b> => <'a,b'> |
317 | <a..b => <a..b> |
318 | <'$> => <"'"'$'> |
319 | <"^> => <'"''^'> |
320 | "# ; |
321 | let mut ok = true; |
322 | for test in tests.trim().split(' \n' ) { |
323 | let parts: Vec<String> = test |
324 | .replace("NL" , " \n" ) |
325 | .split("=>" ) |
326 | .map(|part| part.trim().trim_start_matches('<' ).trim_end_matches('>' ).to_owned()) |
327 | .collect(); |
328 | assert!(parts.len() == 2); |
329 | let unquoted = &*parts[0]; |
330 | let quoted_expected = &*parts[1]; |
331 | let quoted_actual = try_quote(&parts[0]).unwrap(); |
332 | if quoted_expected != quoted_actual { |
333 | #[cfg (not(feature = "std" ))] |
334 | panic!("FAIL: for input < {}>, expected < {}>, got < {}>" , |
335 | unquoted, quoted_expected, quoted_actual); |
336 | #[cfg (feature = "std" )] |
337 | println!("FAIL: for input < {}>, expected < {}>, got < {}>" , |
338 | unquoted, quoted_expected, quoted_actual); |
339 | ok = false; |
340 | } |
341 | } |
342 | assert!(ok); |
343 | } |
344 | |
345 | #[test ] |
346 | #[allow (deprecated)] |
347 | fn test_join() { |
348 | assert_eq!(join(vec![]), "" ); |
349 | assert_eq!(join(vec!["" ]), "''" ); |
350 | assert_eq!(join(vec!["a" , "b" ]), "a b" ); |
351 | assert_eq!(join(vec!["foo bar" , "baz" ]), "'foo bar' baz" ); |
352 | } |
353 | |
354 | #[test ] |
355 | fn test_fallible() { |
356 | assert_eq!(try_join(vec![" \0" ]), Err(QuoteError::Nul)); |
357 | assert_eq!(try_quote(" \0" ), Err(QuoteError::Nul)); |
358 | } |
359 | |