1 | // Copyright 2015 Nicholas Allegra (comex). |
2 | // Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or |
3 | // the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be |
4 | // copied, modified, or distributed except according to those terms. |
5 | |
6 | //! [`Shlex`] and friends for byte strings. |
7 | //! |
8 | //! This is used internally by the [outer module](crate), and may be more |
9 | //! convenient if you are working with byte slices (`[u8]`) or types that are |
10 | //! wrappers around bytes, such as [`OsStr`](std::ffi::OsStr): |
11 | //! |
12 | //! ```rust |
13 | //! #[cfg(unix)] { |
14 | //! use shlex::bytes::quote; |
15 | //! use std::ffi::OsStr; |
16 | //! use std::os::unix::ffi::OsStrExt; |
17 | //! |
18 | //! // `\x80` is invalid in UTF-8. |
19 | //! let os_str = OsStr::from_bytes(b"a \x80b c" ); |
20 | //! assert_eq!(quote(os_str.as_bytes()), &b"'a \x80b c'" [..]); |
21 | //! } |
22 | //! ``` |
23 | //! |
24 | //! (On Windows, `OsStr` uses 16 bit wide characters so this will not work.) |
25 | |
26 | extern crate alloc; |
27 | use alloc::vec::Vec; |
28 | use alloc::borrow::Cow; |
29 | #[cfg (test)] |
30 | use alloc::vec; |
31 | #[cfg (test)] |
32 | use alloc::borrow::ToOwned; |
33 | #[cfg (all(doc, not(doctest)))] |
34 | use crate::{self as shlex, quoting_warning}; |
35 | |
36 | use super::QuoteError; |
37 | |
38 | /// An iterator that takes an input byte string and splits it into the words using the same syntax as |
39 | /// the POSIX shell. |
40 | pub struct Shlex<'a> { |
41 | in_iter: core::slice::Iter<'a, u8>, |
42 | /// The number of newlines read so far, plus one. |
43 | pub line_no: usize, |
44 | /// An input string is erroneous if it ends while inside a quotation or right after an |
45 | /// unescaped backslash. Since Iterator does not have a mechanism to return an error, if that |
46 | /// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to |
47 | /// true; best to check it after you're done iterating. |
48 | pub had_error: bool, |
49 | } |
50 | |
51 | impl<'a> Shlex<'a> { |
52 | pub fn new(in_bytes: &'a [u8]) -> Self { |
53 | Shlex { |
54 | in_iter: in_bytes.iter(), |
55 | line_no: 1, |
56 | had_error: false, |
57 | } |
58 | } |
59 | |
60 | fn parse_word(&mut self, mut ch: u8) -> Option<Vec<u8>> { |
61 | let mut result: Vec<u8> = Vec::new(); |
62 | loop { |
63 | match ch as char { |
64 | '"' => if let Err(()) = self.parse_double(&mut result) { |
65 | self.had_error = true; |
66 | return None; |
67 | }, |
68 | ' \'' => if let Err(()) = self.parse_single(&mut result) { |
69 | self.had_error = true; |
70 | return None; |
71 | }, |
72 | ' \\' => if let Some(ch2) = self.next_char() { |
73 | if ch2 != ' \n' as u8 { result.push(ch2); } |
74 | } else { |
75 | self.had_error = true; |
76 | return None; |
77 | }, |
78 | ' ' | ' \t' | ' \n' => { break; }, |
79 | _ => { result.push(ch as u8); }, |
80 | } |
81 | if let Some(ch2) = self.next_char() { ch = ch2; } else { break; } |
82 | } |
83 | Some(result) |
84 | } |
85 | |
86 | fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> { |
87 | loop { |
88 | if let Some(ch2) = self.next_char() { |
89 | match ch2 as char { |
90 | ' \\' => { |
91 | if let Some(ch3) = self.next_char() { |
92 | match ch3 as char { |
93 | // \$ => $ |
94 | '$' | '`' | '"' | ' \\' => { result.push(ch3); }, |
95 | // \<newline> => nothing |
96 | ' \n' => {}, |
97 | // \x => =x |
98 | _ => { result.push(' \\' as u8); result.push(ch3); } |
99 | } |
100 | } else { |
101 | return Err(()); |
102 | } |
103 | }, |
104 | '"' => { return Ok(()); }, |
105 | _ => { result.push(ch2); }, |
106 | } |
107 | } else { |
108 | return Err(()); |
109 | } |
110 | } |
111 | } |
112 | |
113 | fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> { |
114 | loop { |
115 | if let Some(ch2) = self.next_char() { |
116 | match ch2 as char { |
117 | ' \'' => { return Ok(()); }, |
118 | _ => { result.push(ch2); }, |
119 | } |
120 | } else { |
121 | return Err(()); |
122 | } |
123 | } |
124 | } |
125 | |
126 | fn next_char(&mut self) -> Option<u8> { |
127 | let res = self.in_iter.next().copied(); |
128 | if res == Some(b' \n' ) { self.line_no += 1; } |
129 | res |
130 | } |
131 | } |
132 | |
133 | impl<'a> Iterator for Shlex<'a> { |
134 | type Item = Vec<u8>; |
135 | fn next(&mut self) -> Option<Self::Item> { |
136 | if let Some(mut ch: u8) = self.next_char() { |
137 | // skip initial whitespace |
138 | loop { |
139 | match ch as char { |
140 | ' ' | ' \t' | ' \n' => {}, |
141 | '#' => { |
142 | while let Some(ch2: u8) = self.next_char() { |
143 | if ch2 as char == ' \n' { break; } |
144 | } |
145 | }, |
146 | _ => { break; } |
147 | } |
148 | if let Some(ch2: u8) = self.next_char() { ch = ch2; } else { return None; } |
149 | } |
150 | self.parse_word(ch) |
151 | } else { // no initial character |
152 | None |
153 | } |
154 | } |
155 | |
156 | } |
157 | |
158 | /// Convenience function that consumes the whole byte string at once. Returns None if the input was |
159 | /// erroneous. |
160 | pub fn split(in_bytes: &[u8]) -> Option<Vec<Vec<u8>>> { |
161 | let mut shl: Shlex<'_> = Shlex::new(in_bytes); |
162 | let res: Vec> = shl.by_ref().collect(); |
163 | if shl.had_error { None } else { Some(res) } |
164 | } |
165 | |
166 | /// A more configurable interface to quote strings. If you only want the default settings you can |
167 | /// use the convenience functions [`try_quote`] and [`try_join`]. |
168 | /// |
169 | /// The string equivalent is [`shlex::Quoter`]. |
170 | #[derive (Default, Debug, Clone)] |
171 | pub struct Quoter { |
172 | allow_nul: bool, |
173 | // TODO: more options |
174 | } |
175 | |
176 | impl Quoter { |
177 | /// Create a new [`Quoter`] with default settings. |
178 | #[inline ] |
179 | pub fn new() -> Self { |
180 | Self::default() |
181 | } |
182 | |
183 | /// Set whether to allow [nul bytes](quoting_warning#nul-bytes). By default they are not |
184 | /// allowed and will result in an error of [`QuoteError::Nul`]. |
185 | #[inline ] |
186 | pub fn allow_nul(mut self, allow: bool) -> Self { |
187 | self.allow_nul = allow; |
188 | self |
189 | } |
190 | |
191 | /// Convenience function that consumes an iterable of words and turns it into a single byte string, |
192 | /// quoting words when necessary. Consecutive words will be separated by a single space. |
193 | pub fn join<'a, I: IntoIterator<Item = &'a [u8]>>(&self, words: I) -> Result<Vec<u8>, QuoteError> { |
194 | Ok(words.into_iter() |
195 | .map(|word| self.quote(word)) |
196 | .collect::<Result<Vec<Cow<[u8]>>, QuoteError>>()? |
197 | .join(&b' ' )) |
198 | } |
199 | |
200 | /// Given a single word, return a byte string suitable to encode it as a shell argument. |
201 | /// |
202 | /// If given valid UTF-8, this will never produce invalid UTF-8. This is because it only |
203 | /// ever inserts valid ASCII characters before or after existing ASCII characters (or |
204 | /// returns two single quotes if the input was an empty string). It will never modify a |
205 | /// multibyte UTF-8 character. |
206 | pub fn quote<'a>(&self, mut in_bytes: &'a [u8]) -> Result<Cow<'a, [u8]>, QuoteError> { |
207 | if in_bytes.is_empty() { |
208 | // Empty string. Special case that isn't meaningful as only part of a word. |
209 | return Ok(b"''" [..].into()); |
210 | } |
211 | if !self.allow_nul && in_bytes.iter().any(|&b| b == b' \0' ) { |
212 | return Err(QuoteError::Nul); |
213 | } |
214 | let mut out: Vec<u8> = Vec::new(); |
215 | while !in_bytes.is_empty() { |
216 | // Pick a quoting strategy for some prefix of the input. Normally this will cover the |
217 | // entire input, but in some case we might need to divide the input into multiple chunks |
218 | // that are quoted differently. |
219 | let (cur_len, strategy) = quoting_strategy(in_bytes); |
220 | if cur_len == in_bytes.len() && strategy == QuotingStrategy::Unquoted && out.is_empty() { |
221 | // Entire string can be represented unquoted. Reuse the allocation. |
222 | return Ok(in_bytes.into()); |
223 | } |
224 | let (cur_chunk, rest) = in_bytes.split_at(cur_len); |
225 | assert!(rest.len() < in_bytes.len()); // no infinite loop |
226 | in_bytes = rest; |
227 | append_quoted_chunk(&mut out, cur_chunk, strategy); |
228 | } |
229 | Ok(out.into()) |
230 | } |
231 | |
232 | } |
233 | |
234 | #[derive (PartialEq)] |
235 | enum QuotingStrategy { |
236 | /// No quotes and no backslash escapes. (If backslash escapes would be necessary, we use a |
237 | /// different strategy instead.) |
238 | Unquoted, |
239 | /// Single quoted. |
240 | SingleQuoted, |
241 | /// Double quotes, potentially with backslash escapes. |
242 | DoubleQuoted, |
243 | // TODO: add $'xxx' and "$(printf 'xxx')" styles |
244 | } |
245 | |
246 | /// Is this ASCII byte okay to emit unquoted? |
247 | const fn unquoted_ok(c: u8) -> bool { |
248 | match c as char { |
249 | // Allowed characters: |
250 | '+' | '-' | '.' | '/' | ':' | '@' | ']' | '_' | |
251 | '0' ..='9' | 'A' ..='Z' | 'a' ..='z' |
252 | => true, |
253 | |
254 | // Non-allowed characters: |
255 | // From POSIX https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html |
256 | // "The application shall quote the following characters if they are to represent themselves:" |
257 | '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | ' \\' | '"' | ' \'' | ' ' | ' \t' | ' \n' | |
258 | // "and the following may need to be quoted under certain circumstances[..]:" |
259 | '*' | '?' | '[' | '#' | '~' | '=' | '%' | |
260 | // Brace expansion. These ought to be in the POSIX list but aren't yet; |
261 | // see: https://www.austingroupbugs.net/view.php?id=1193 |
262 | '{' | '}' | |
263 | // Also quote comma, just to be safe in the extremely odd case that the user of this crate |
264 | // is intentionally placing a quoted string inside a brace expansion, e.g.: |
265 | // format!("echo foo{{a,b,{}}}" | shlex::quote(some_str)) |
266 | ',' | |
267 | // '\r' is allowed in a word by all real shells I tested, but is treated as a word |
268 | // separator by Python `shlex` | and might be translated to '\n' in interactive mode. |
269 | ' \r' | |
270 | // '!' and '^' are treated specially in interactive mode; see quoting_warning. |
271 | '!' | '^' | |
272 | // Nul bytes and control characters. |
273 | ' \x00' ..= ' \x1f' | ' \x7f' |
274 | => false, |
275 | ' \u{80}' ..= ' \u{10ffff}' => { |
276 | // This is unreachable since `unquoted_ok` is only called for 0..128. |
277 | // Non-ASCII bytes are handled separately in `quoting_strategy`. |
278 | // Can't call unreachable!() from `const fn` on old Rust, so... |
279 | unquoted_ok(c) |
280 | }, |
281 | } |
282 | // Note: The logic cited above for quoting comma might suggest that `..` should also be quoted, |
283 | // it as a special case of brace expansion). But it's not necessary. There are three cases: |
284 | // |
285 | // 1. The user wants comma-based brace expansion, but the untrusted string being `quote`d |
286 | // contains `..`, so they get something like `{foo,bar,3..5}`. |
287 | // => That's safe; both Bash and Zsh expand this to `foo bar 3..5` rather than |
288 | // `foo bar 3 4 5`. The presence of commas disables sequence expression expansion. |
289 | // |
290 | // 2. The user wants comma-based brace expansion where the contents of the braces are a |
291 | // variable number of `quote`d strings and nothing else. There happens to be exactly |
292 | // one string and it contains `..`, so they get something like `{3..5}`. |
293 | // => Then this will expand as a sequence expression, which is unintended. But I don't mind, |
294 | // because any such code is already buggy. Suppose the untrusted string *didn't* contain |
295 | // `,` or `..`, resulting in shell input like `{foo}`. Then the shell would interpret it |
296 | // as the literal string `{foo}` rather than brace-expanding it into `foo`. |
297 | // |
298 | // 3. The user wants a sequence expression and wants to supply an untrusted string as one of |
299 | // the endpoints or the increment. |
300 | // => Well, that's just silly, since the endpoints can only be numbers or single letters. |
301 | } |
302 | |
303 | /// Optimized version of `unquoted_ok`. |
304 | fn unquoted_ok_fast(c: u8) -> bool { |
305 | const UNQUOTED_OK_MASK: u128 = { |
306 | // Make a mask of all bytes in 0..<0x80 that pass. |
307 | let mut c: u8 = 0u8; |
308 | let mut mask: u128 = 0u128; |
309 | while c < 0x80 { |
310 | if unquoted_ok(c) { |
311 | mask |= 1u128 << c; |
312 | } |
313 | c += 1; |
314 | } |
315 | mask |
316 | }; |
317 | ((UNQUOTED_OK_MASK >> c) & 1) != 0 |
318 | } |
319 | |
320 | /// Is this ASCII byte okay to emit in single quotes? |
321 | fn single_quoted_ok(c: u8) -> bool { |
322 | match c { |
323 | // No single quotes in single quotes. |
324 | b' \'' => false, |
325 | // To work around a Bash bug, ^ is only allowed right after an opening single quote; see |
326 | // quoting_warning. |
327 | b'^' => false, |
328 | // Backslashes in single quotes are literal according to POSIX, but Fish treats them as an |
329 | // escape character. Ban them. Fish doesn't aim to be POSIX-compatible, but we *can* |
330 | // achieve Fish compatibility using double quotes, so we might as well. |
331 | b' \\' => false, |
332 | _ => true |
333 | } |
334 | } |
335 | |
336 | /// Is this ASCII byte okay to emit in double quotes? |
337 | fn double_quoted_ok(c: u8) -> bool { |
338 | match c { |
339 | // Work around Python `shlex` bug where parsing "\`" and "\$" doesn't strip the |
340 | // backslash, even though POSIX requires it. |
341 | b'`' | b'$' => false, |
342 | // '!' and '^' are treated specially in interactive mode; see quoting_warning. |
343 | b'!' | b'^' => false, |
344 | _ => true |
345 | } |
346 | } |
347 | |
348 | /// Given an input, return a quoting strategy that can cover some prefix of the string, along with |
349 | /// the size of that prefix. |
350 | /// |
351 | /// Precondition: input size is nonzero. (Empty strings are handled by the caller.) |
352 | /// Postcondition: returned size is nonzero. |
353 | #[cfg_attr (manual_codegen_check, inline(never))] |
354 | fn quoting_strategy(in_bytes: &[u8]) -> (usize, QuotingStrategy) { |
355 | const UNQUOTED_OK: u8 = 1; |
356 | const SINGLE_QUOTED_OK: u8 = 2; |
357 | const DOUBLE_QUOTED_OK: u8 = 4; |
358 | |
359 | let mut prev_ok = SINGLE_QUOTED_OK | DOUBLE_QUOTED_OK | UNQUOTED_OK; |
360 | let mut i = 0; |
361 | |
362 | if in_bytes[0] == b'^' { |
363 | // To work around a Bash bug, ^ is only allowed right after an opening single quote; see |
364 | // quoting_warning. |
365 | prev_ok = SINGLE_QUOTED_OK; |
366 | i = 1; |
367 | } |
368 | |
369 | while i < in_bytes.len() { |
370 | let c = in_bytes[i]; |
371 | let mut cur_ok = prev_ok; |
372 | |
373 | if c >= 0x80 { |
374 | // Normally, non-ASCII characters shouldn't require quoting, but see quoting_warning.md |
375 | // about \xa0. For now, just treat all non-ASCII characters as requiring quotes. This |
376 | // also ensures things are safe in the off-chance that you're in a legacy 8-bit locale that |
377 | // has additional characters satisfying `isblank`. |
378 | cur_ok &= !UNQUOTED_OK; |
379 | } else { |
380 | if !unquoted_ok_fast(c) { |
381 | cur_ok &= !UNQUOTED_OK; |
382 | } |
383 | if !single_quoted_ok(c){ |
384 | cur_ok &= !SINGLE_QUOTED_OK; |
385 | } |
386 | if !double_quoted_ok(c) { |
387 | cur_ok &= !DOUBLE_QUOTED_OK; |
388 | } |
389 | } |
390 | |
391 | if cur_ok == 0 { |
392 | // There are no quoting strategies that would work for both the previous characters and |
393 | // this one. So we have to end the chunk before this character. The caller will call |
394 | // `quoting_strategy` again to handle the rest of the string. |
395 | break; |
396 | } |
397 | |
398 | prev_ok = cur_ok; |
399 | i += 1; |
400 | } |
401 | |
402 | // Pick the best allowed strategy. |
403 | let strategy = if prev_ok & UNQUOTED_OK != 0 { |
404 | QuotingStrategy::Unquoted |
405 | } else if prev_ok & SINGLE_QUOTED_OK != 0 { |
406 | QuotingStrategy::SingleQuoted |
407 | } else if prev_ok & DOUBLE_QUOTED_OK != 0 { |
408 | QuotingStrategy::DoubleQuoted |
409 | } else { |
410 | unreachable!() |
411 | }; |
412 | debug_assert!(i > 0); |
413 | (i, strategy) |
414 | } |
415 | |
416 | fn append_quoted_chunk(out: &mut Vec<u8>, cur_chunk: &[u8], strategy: QuotingStrategy) { |
417 | match strategy { |
418 | QuotingStrategy::Unquoted => { |
419 | out.extend_from_slice(cur_chunk); |
420 | }, |
421 | QuotingStrategy::SingleQuoted => { |
422 | out.reserve(cur_chunk.len() + 2); |
423 | out.push(b' \'' ); |
424 | out.extend_from_slice(cur_chunk); |
425 | out.push(b' \'' ); |
426 | }, |
427 | QuotingStrategy::DoubleQuoted => { |
428 | out.reserve(cur_chunk.len() + 2); |
429 | out.push(b'"' ); |
430 | for &c in cur_chunk.into_iter() { |
431 | if let b'$' | b'`' | b'"' | b' \\' = c { |
432 | // Add a preceding backslash. |
433 | // Note: We shouldn't actually get here for $ and ` because they don't pass |
434 | // `double_quoted_ok`. |
435 | out.push(b' \\' ); |
436 | } |
437 | // Add the character itself. |
438 | out.push(c); |
439 | } |
440 | out.push(b'"' ); |
441 | }, |
442 | } |
443 | } |
444 | |
445 | /// Convenience function that consumes an iterable of words and turns it into a single byte string, |
446 | /// quoting words when necessary. Consecutive words will be separated by a single space. |
447 | /// |
448 | /// Uses default settings except that nul bytes are passed through, which [may be |
449 | /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. |
450 | /// |
451 | /// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter). |
452 | /// |
453 | /// (That configuration never returns `Err`, so this function does not panic.) |
454 | /// |
455 | /// The string equivalent is [shlex::join]. |
456 | #[deprecated (since = "1.3.0" , note = "replace with `try_join(words)?` to avoid nul byte danger" )] |
457 | pub fn join<'a, I: IntoIterator<Item = &'a [u8]>>(words: I) -> Vec<u8> { |
458 | Quoter::new().allow_nul(allow:true).join(words).unwrap() |
459 | } |
460 | |
461 | /// Convenience function that consumes an iterable of words and turns it into a single byte string, |
462 | /// quoting words when necessary. Consecutive words will be separated by a single space. |
463 | /// |
464 | /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. |
465 | /// |
466 | /// Equivalent to [`Quoter::new().join(words)`](Quoter). |
467 | /// |
468 | /// The string equivalent is [shlex::try_join]. |
469 | pub fn try_join<'a, I: IntoIterator<Item = &'a [u8]>>(words: I) -> Result<Vec<u8>, QuoteError> { |
470 | Quoter::new().join(words) |
471 | } |
472 | |
473 | /// Given a single word, return a string suitable to encode it as a shell argument. |
474 | /// |
475 | /// Uses default settings except that nul bytes are passed through, which [may be |
476 | /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. |
477 | /// |
478 | /// Equivalent to [`Quoter::new().allow_nul(true).quote(in_bytes).unwrap()`](Quoter). |
479 | /// |
480 | /// (That configuration never returns `Err`, so this function does not panic.) |
481 | /// |
482 | /// The string equivalent is [shlex::quote]. |
483 | #[deprecated (since = "1.3.0" , note = "replace with `try_quote(str)?` to avoid nul byte danger" )] |
484 | pub fn quote(in_bytes: &[u8]) -> Cow<[u8]> { |
485 | Quoter::new().allow_nul(allow:true).quote(in_bytes).unwrap() |
486 | } |
487 | |
488 | /// Given a single word, return a string suitable to encode it as a shell argument. |
489 | /// |
490 | /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. |
491 | /// |
492 | /// Equivalent to [`Quoter::new().quote(in_bytes)`](Quoter). |
493 | /// |
494 | /// (That configuration never returns `Err`, so this function does not panic.) |
495 | /// |
496 | /// The string equivalent is [shlex::try_quote]. |
497 | pub fn try_quote(in_bytes: &[u8]) -> Result<Cow<[u8]>, QuoteError> { |
498 | Quoter::new().quote(in_bytes) |
499 | } |
500 | |
501 | #[cfg (test)] |
502 | const INVALID_UTF8: &[u8] = b" \xa1" ; |
503 | #[cfg (test)] |
504 | const INVALID_UTF8_SINGLEQUOTED: &[u8] = b"' \xa1'" ; |
505 | |
506 | #[test ] |
507 | #[allow (invalid_from_utf8)] |
508 | fn test_invalid_utf8() { |
509 | // Check that our test string is actually invalid UTF-8. |
510 | assert!(core::str::from_utf8(INVALID_UTF8).is_err()); |
511 | } |
512 | |
513 | #[cfg (test)] |
514 | static SPLIT_TEST_ITEMS: &'static [(&'static [u8], Option<&'static [&'static [u8]]>)] = &[ |
515 | (b"foo$baz" , Some(&[b"foo$baz" ])), |
516 | (b"foo baz" , Some(&[b"foo" , b"baz" ])), |
517 | (b"foo \"bar \"baz" , Some(&[b"foobarbaz" ])), |
518 | (b"foo \"bar \"baz" , Some(&[b"foo" , b"barbaz" ])), |
519 | (b" foo \nbar" , Some(&[b"foo" , b"bar" ])), |
520 | (b"foo \\\nbar" , Some(&[b"foobar" ])), |
521 | (b" \"foo \\\nbar \"" , Some(&[b"foobar" ])), |
522 | (b"'baz \\$b'" , Some(&[b"baz \\$b" ])), |
523 | (b"'baz \\\''" , None), |
524 | (b" \\" , None), |
525 | (b" \"\\" , None), |
526 | (b"' \\" , None), |
527 | (b" \"" , None), |
528 | (b"'" , None), |
529 | (b"foo #bar \nbaz" , Some(&[b"foo" , b"baz" ])), |
530 | (b"foo #bar" , Some(&[b"foo" ])), |
531 | (b"foo#bar" , Some(&[b"foo#bar" ])), |
532 | (b"foo \"#bar" , None), |
533 | (b"' \\n'" , Some(&[b" \\n" ])), |
534 | (b"' \\\\n'" , Some(&[b" \\\\n" ])), |
535 | (INVALID_UTF8, Some(&[INVALID_UTF8])), |
536 | ]; |
537 | |
538 | #[test ] |
539 | fn test_split() { |
540 | for &(input: &[u8], output) in SPLIT_TEST_ITEMS { |
541 | assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect())); |
542 | } |
543 | } |
544 | |
545 | #[test ] |
546 | fn test_lineno() { |
547 | let mut sh: Shlex<'_> = Shlex::new(in_bytes:b" \nfoo \nbar" ); |
548 | while let Some(word: Vec) = sh.next() { |
549 | if word == b"bar" { |
550 | assert_eq!(sh.line_no, 3); |
551 | } |
552 | } |
553 | } |
554 | |
555 | #[test ] |
556 | #[allow (deprecated)] |
557 | fn test_quote() { |
558 | // Validate behavior with invalid UTF-8: |
559 | assert_eq!(quote(INVALID_UTF8), INVALID_UTF8_SINGLEQUOTED); |
560 | // Replicate a few tests from lib.rs. No need to replicate all of them. |
561 | assert_eq!(quote(b"" ), &b"''" [..]); |
562 | assert_eq!(quote(b"foobar" ), &b"foobar" [..]); |
563 | assert_eq!(quote(b"foo bar" ), &b"'foo bar'" [..]); |
564 | assert_eq!(quote(b"' \"" ), &b" \"' \\\"\"" [..]); |
565 | assert_eq!(quote(b"" ), &b"''" [..]); |
566 | } |
567 | |
568 | #[test ] |
569 | #[allow (deprecated)] |
570 | fn test_join() { |
571 | // Validate behavior with invalid UTF-8: |
572 | assert_eq!(join(vec![INVALID_UTF8]), INVALID_UTF8_SINGLEQUOTED); |
573 | // Replicate a few tests from lib.rs. No need to replicate all of them. |
574 | assert_eq!(join(vec![]), &b"" [..]); |
575 | assert_eq!(join(vec![&b"" [..]]), b"''" ); |
576 | } |
577 | |