1 | // Copyright 2015 Nicholas Allegra (comex). |
2 | // Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or |
3 | // the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be |
4 | // copied, modified, or distributed except according to those terms. |
5 | |
6 | //! Same idea as (but implementation not directly based on) the Python shlex module. However, this |
7 | //! implementation does not support any of the Python module's customization because it makes |
8 | //! parsing slower and is fairly useless. You only get the default settings of shlex.split, which |
9 | //! mimic the POSIX shell: |
10 | //! <https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html> |
11 | //! |
12 | //! This implementation also deviates from the Python version in not treating `\r` specially, which |
13 | //! I believe is more compliant. |
14 | //! |
15 | //! The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate over the bytes |
16 | //! directly as a micro-optimization. |
17 | //! |
18 | //! Disabling the `std` feature (which is enabled by default) will allow the crate to work in |
19 | //! `no_std` environments, where the `alloc` crate, and a global allocator, are available. |
20 | |
21 | #![cfg_attr (not(feature = "std" ), no_std)] |
22 | |
23 | extern crate alloc; |
24 | use alloc::vec::Vec; |
25 | use alloc::borrow::Cow; |
26 | use alloc::string::String; |
27 | #[cfg (test)] |
28 | use alloc::vec; |
29 | #[cfg (test)] |
30 | use alloc::borrow::ToOwned; |
31 | |
32 | /// An iterator that takes an input string and splits it into the words using the same syntax as |
33 | /// the POSIX shell. |
34 | pub struct Shlex<'a> { |
35 | in_iter: core::str::Bytes<'a>, |
36 | /// The number of newlines read so far, plus one. |
37 | pub line_no: usize, |
38 | /// An input string is erroneous if it ends while inside a quotation or right after an |
39 | /// unescaped backslash. Since Iterator does not have a mechanism to return an error, if that |
40 | /// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to |
41 | /// true; best to check it after you're done iterating. |
42 | pub had_error: bool, |
43 | } |
44 | |
45 | impl<'a> Shlex<'a> { |
46 | pub fn new(in_str: &'a str) -> Self { |
47 | Shlex { |
48 | in_iter: in_str.bytes(), |
49 | line_no: 1, |
50 | had_error: false, |
51 | } |
52 | } |
53 | |
54 | fn parse_word(&mut self, mut ch: u8) -> Option<String> { |
55 | let mut result: Vec<u8> = Vec::new(); |
56 | loop { |
57 | match ch as char { |
58 | '"' => if let Err(()) = self.parse_double(&mut result) { |
59 | self.had_error = true; |
60 | return None; |
61 | }, |
62 | ' \'' => if let Err(()) = self.parse_single(&mut result) { |
63 | self.had_error = true; |
64 | return None; |
65 | }, |
66 | ' \\' => if let Some(ch2) = self.next_char() { |
67 | if ch2 != ' \n' as u8 { result.push(ch2); } |
68 | } else { |
69 | self.had_error = true; |
70 | return None; |
71 | }, |
72 | ' ' | ' \t' | ' \n' => { break; }, |
73 | _ => { result.push(ch as u8); }, |
74 | } |
75 | if let Some(ch2) = self.next_char() { ch = ch2; } else { break; } |
76 | } |
77 | unsafe { Some(String::from_utf8_unchecked(result)) } |
78 | } |
79 | |
80 | fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> { |
81 | loop { |
82 | if let Some(ch2) = self.next_char() { |
83 | match ch2 as char { |
84 | ' \\' => { |
85 | if let Some(ch3) = self.next_char() { |
86 | match ch3 as char { |
87 | // \$ => $ |
88 | '$' | '`' | '"' | ' \\' => { result.push(ch3); }, |
89 | // \<newline> => nothing |
90 | ' \n' => {}, |
91 | // \x => =x |
92 | _ => { result.push(' \\' as u8); result.push(ch3); } |
93 | } |
94 | } else { |
95 | return Err(()); |
96 | } |
97 | }, |
98 | '"' => { return Ok(()); }, |
99 | _ => { result.push(ch2); }, |
100 | } |
101 | } else { |
102 | return Err(()); |
103 | } |
104 | } |
105 | } |
106 | |
107 | fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> { |
108 | loop { |
109 | if let Some(ch2) = self.next_char() { |
110 | match ch2 as char { |
111 | ' \'' => { return Ok(()); }, |
112 | _ => { result.push(ch2); }, |
113 | } |
114 | } else { |
115 | return Err(()); |
116 | } |
117 | } |
118 | } |
119 | |
120 | fn next_char(&mut self) -> Option<u8> { |
121 | let res = self.in_iter.next(); |
122 | if res == Some(' \n' as u8) { self.line_no += 1; } |
123 | res |
124 | } |
125 | } |
126 | |
127 | impl<'a> Iterator for Shlex<'a> { |
128 | type Item = String; |
129 | fn next(&mut self) -> Option<String> { |
130 | if let Some(mut ch: u8) = self.next_char() { |
131 | // skip initial whitespace |
132 | loop { |
133 | match ch as char { |
134 | ' ' | ' \t' | ' \n' => {}, |
135 | '#' => { |
136 | while let Some(ch2: u8) = self.next_char() { |
137 | if ch2 as char == ' \n' { break; } |
138 | } |
139 | }, |
140 | _ => { break; } |
141 | } |
142 | if let Some(ch2: u8) = self.next_char() { ch = ch2; } else { return None; } |
143 | } |
144 | self.parse_word(ch) |
145 | } else { // no initial character |
146 | None |
147 | } |
148 | } |
149 | |
150 | } |
151 | |
152 | /// Convenience function that consumes the whole string at once. Returns None if the input was |
153 | /// erroneous. |
154 | pub fn split(in_str: &str) -> Option<Vec<String>> { |
155 | let mut shl: Shlex<'_> = Shlex::new(in_str); |
156 | let res: Vec = shl.by_ref().collect(); |
157 | if shl.had_error { None } else { Some(res) } |
158 | } |
159 | |
160 | /// Given a single word, return a string suitable to encode it as a shell argument. |
161 | pub fn quote(in_str: &str) -> Cow<str> { |
162 | if in_str.len() == 0 { |
163 | " \"\"" .into() |
164 | } else if in_str.bytes().any(|c: u8| match c as char { |
165 | '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | ' \\' | '"' | ' \'' | ' ' | ' \t' | |
166 | ' \r' | ' \n' | '*' | '?' | '[' | '#' | '~' | '=' | '%' => true, |
167 | _ => false |
168 | }) { |
169 | let mut out: Vec<u8> = Vec::new(); |
170 | out.push('"' as u8); |
171 | for c: u8 in in_str.bytes() { |
172 | match c as char { |
173 | '$' | '`' | '"' | ' \\' => out.push(' \\' as u8), |
174 | _ => () |
175 | } |
176 | out.push(c); |
177 | } |
178 | out.push('"' as u8); |
179 | unsafe { String::from_utf8_unchecked(bytes:out) }.into() |
180 | } else { |
181 | in_str.into() |
182 | } |
183 | } |
184 | |
185 | /// Convenience function that consumes an iterable of words and turns it into a single string, |
186 | /// quoting words when necessary. Consecutive words will be separated by a single space. |
187 | pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String { |
188 | words.into_iter() |
189 | .map(quote) |
190 | .collect::<Vec<_>>() |
191 | .join(sep:" " ) |
192 | } |
193 | |
194 | #[cfg (test)] |
195 | static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[ |
196 | ("foo$baz" , Some(&["foo$baz" ])), |
197 | ("foo baz" , Some(&["foo" , "baz" ])), |
198 | ("foo \"bar \"baz" , Some(&["foobarbaz" ])), |
199 | ("foo \"bar \"baz" , Some(&["foo" , "barbaz" ])), |
200 | (" foo \nbar" , Some(&["foo" , "bar" ])), |
201 | ("foo \\\nbar" , Some(&["foobar" ])), |
202 | (" \"foo \\\nbar \"" , Some(&["foobar" ])), |
203 | ("'baz \\$b'" , Some(&["baz \\$b" ])), |
204 | ("'baz \\\''" , None), |
205 | (" \\" , None), |
206 | (" \"\\" , None), |
207 | ("' \\" , None), |
208 | (" \"" , None), |
209 | ("'" , None), |
210 | ("foo #bar \nbaz" , Some(&["foo" , "baz" ])), |
211 | ("foo #bar" , Some(&["foo" ])), |
212 | ("foo#bar" , Some(&["foo#bar" ])), |
213 | ("foo \"#bar" , None), |
214 | ("' \\n'" , Some(&[" \\n" ])), |
215 | ("' \\\\n'" , Some(&[" \\\\n" ])), |
216 | ]; |
217 | |
218 | #[test ] |
219 | fn test_split() { |
220 | for &(input: &str, output) in SPLIT_TEST_ITEMS { |
221 | assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect())); |
222 | } |
223 | } |
224 | |
225 | #[test ] |
226 | fn test_lineno() { |
227 | let mut sh: Shlex<'_> = Shlex::new(in_str:" \nfoo \nbar" ); |
228 | while let Some(word: String) = sh.next() { |
229 | if word == "bar" { |
230 | assert_eq!(sh.line_no, 3); |
231 | } |
232 | } |
233 | } |
234 | |
235 | #[test ] |
236 | fn test_quote() { |
237 | assert_eq!(quote("foobar" ), "foobar" ); |
238 | assert_eq!(quote("foo bar" ), " \"foo bar \"" ); |
239 | assert_eq!(quote(" \"" ), " \"\\\"\"" ); |
240 | assert_eq!(quote("" ), " \"\"" ); |
241 | } |
242 | |
243 | #[test ] |
244 | fn test_join() { |
245 | assert_eq!(join(vec![]), "" ); |
246 | assert_eq!(join(vec!["" ]), " \"\"" ); |
247 | assert_eq!(join(vec!["a" , "b" ]), "a b" ); |
248 | assert_eq!(join(vec!["foo bar" , "baz" ]), " \"foo bar \" baz" ); |
249 | } |
250 | |