1 | /* gshell.c - Shell-related utilities |
2 | * |
3 | * Copyright 2000 Red Hat, Inc. |
4 | * g_execvpe implementation based on GNU libc execvp: |
5 | * Copyright 1991, 92, 95, 96, 97, 98, 99 Free Software Foundation, Inc. |
6 | * |
7 | * This library is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * This library is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public License |
18 | * along with this library; if not, see <http://www.gnu.org/licenses/>. |
19 | */ |
20 | |
21 | #include "config.h" |
22 | |
23 | #include <string.h> |
24 | |
25 | #include "gshell.h" |
26 | |
27 | #include "gslist.h" |
28 | #include "gstrfuncs.h" |
29 | #include "gstring.h" |
30 | #include "gtestutils.h" |
31 | #include "glibintl.h" |
32 | #include "gthread.h" |
33 | |
34 | /** |
35 | * SECTION:shell |
36 | * @title: Shell-related Utilities |
37 | * @short_description: shell-like commandline handling |
38 | * |
39 | * GLib provides the functions g_shell_quote() and g_shell_unquote() |
40 | * to handle shell-like quoting in strings. The function g_shell_parse_argv() |
41 | * parses a string similar to the way a POSIX shell (/bin/sh) would. |
42 | * |
43 | * Note that string handling in shells has many obscure and historical |
44 | * corner-cases which these functions do not necessarily reproduce. They |
45 | * are good enough in practice, though. |
46 | */ |
47 | |
48 | /** |
49 | * G_SHELL_ERROR: |
50 | * |
51 | * Error domain for shell functions. Errors in this domain will be from |
52 | * the #GShellError enumeration. See #GError for information on error |
53 | * domains. |
54 | **/ |
55 | |
56 | /** |
57 | * GShellError: |
58 | * @G_SHELL_ERROR_BAD_QUOTING: Mismatched or otherwise mangled quoting. |
59 | * @G_SHELL_ERROR_EMPTY_STRING: String to be parsed was empty. |
60 | * @G_SHELL_ERROR_FAILED: Some other error. |
61 | * |
62 | * Error codes returned by shell functions. |
63 | **/ |
64 | G_DEFINE_QUARK (g-shell-error-quark, g_shell_error) |
65 | |
66 | /* Single quotes preserve the literal string exactly. escape |
67 | * sequences are not allowed; not even \' - if you want a ' |
68 | * in the quoted text, you have to do something like 'foo'\''bar' |
69 | * |
70 | * Double quotes allow $ ` " \ and newline to be escaped with backslash. |
71 | * Otherwise double quotes preserve things literally. |
72 | */ |
73 | |
74 | static gboolean |
75 | unquote_string_inplace (gchar* str, gchar** end, GError** err) |
76 | { |
77 | gchar* dest; |
78 | gchar* s; |
79 | gchar quote_char; |
80 | |
81 | g_return_val_if_fail(end != NULL, FALSE); |
82 | g_return_val_if_fail(err == NULL || *err == NULL, FALSE); |
83 | g_return_val_if_fail(str != NULL, FALSE); |
84 | |
85 | dest = s = str; |
86 | |
87 | quote_char = *s; |
88 | |
89 | if (!(*s == '"' || *s == '\'')) |
90 | { |
91 | g_set_error_literal (err, |
92 | G_SHELL_ERROR, |
93 | code: G_SHELL_ERROR_BAD_QUOTING, |
94 | _("Quoted text doesn’t begin with a quotation mark" )); |
95 | *end = str; |
96 | return FALSE; |
97 | } |
98 | |
99 | /* Skip the initial quote mark */ |
100 | ++s; |
101 | |
102 | if (quote_char == '"') |
103 | { |
104 | while (*s) |
105 | { |
106 | g_assert(s > dest); /* loop invariant */ |
107 | |
108 | switch (*s) |
109 | { |
110 | case '"': |
111 | /* End of the string, return now */ |
112 | *dest = '\0'; |
113 | ++s; |
114 | *end = s; |
115 | return TRUE; |
116 | break; |
117 | |
118 | case '\\': |
119 | /* Possible escaped quote or \ */ |
120 | ++s; |
121 | switch (*s) |
122 | { |
123 | case '"': |
124 | case '\\': |
125 | case '`': |
126 | case '$': |
127 | case '\n': |
128 | *dest = *s; |
129 | ++s; |
130 | ++dest; |
131 | break; |
132 | |
133 | default: |
134 | /* not an escaped char */ |
135 | *dest = '\\'; |
136 | ++dest; |
137 | /* ++s already done. */ |
138 | break; |
139 | } |
140 | break; |
141 | |
142 | default: |
143 | *dest = *s; |
144 | ++dest; |
145 | ++s; |
146 | break; |
147 | } |
148 | |
149 | g_assert(s > dest); /* loop invariant */ |
150 | } |
151 | } |
152 | else |
153 | { |
154 | while (*s) |
155 | { |
156 | g_assert(s > dest); /* loop invariant */ |
157 | |
158 | if (*s == '\'') |
159 | { |
160 | /* End of the string, return now */ |
161 | *dest = '\0'; |
162 | ++s; |
163 | *end = s; |
164 | return TRUE; |
165 | } |
166 | else |
167 | { |
168 | *dest = *s; |
169 | ++dest; |
170 | ++s; |
171 | } |
172 | |
173 | g_assert(s > dest); /* loop invariant */ |
174 | } |
175 | } |
176 | |
177 | /* If we reach here this means the close quote was never encountered */ |
178 | |
179 | *dest = '\0'; |
180 | |
181 | g_set_error_literal (err, |
182 | G_SHELL_ERROR, |
183 | code: G_SHELL_ERROR_BAD_QUOTING, |
184 | _("Unmatched quotation mark in command line or other shell-quoted text" )); |
185 | *end = s; |
186 | return FALSE; |
187 | } |
188 | |
189 | /** |
190 | * g_shell_quote: |
191 | * @unquoted_string: (type filename): a literal string |
192 | * |
193 | * Quotes a string so that the shell (/bin/sh) will interpret the |
194 | * quoted string to mean @unquoted_string. If you pass a filename to |
195 | * the shell, for example, you should first quote it with this |
196 | * function. The return value must be freed with g_free(). The |
197 | * quoting style used is undefined (single or double quotes may be |
198 | * used). |
199 | * |
200 | * Returns: (type filename): quoted string |
201 | **/ |
202 | gchar* |
203 | g_shell_quote (const gchar *unquoted_string) |
204 | { |
205 | /* We always use single quotes, because the algorithm is cheesier. |
206 | * We could use double if we felt like it, that might be more |
207 | * human-readable. |
208 | */ |
209 | |
210 | const gchar *p; |
211 | GString *dest; |
212 | |
213 | g_return_val_if_fail (unquoted_string != NULL, NULL); |
214 | |
215 | dest = g_string_new (init: "'" ); |
216 | |
217 | p = unquoted_string; |
218 | |
219 | /* could speed this up a lot by appending chunks of text at a |
220 | * time. |
221 | */ |
222 | while (*p) |
223 | { |
224 | /* Replace literal ' with a close ', a \', and an open ' */ |
225 | if (*p == '\'') |
226 | g_string_append (string: dest, val: "'\\''" ); |
227 | else |
228 | g_string_append_c (dest, *p); |
229 | |
230 | ++p; |
231 | } |
232 | |
233 | /* close the quote */ |
234 | g_string_append_c (dest, '\''); |
235 | |
236 | return g_string_free (string: dest, FALSE); |
237 | } |
238 | |
239 | /** |
240 | * g_shell_unquote: |
241 | * @quoted_string: (type filename): shell-quoted string |
242 | * @error: error return location or NULL |
243 | * |
244 | * Unquotes a string as the shell (/bin/sh) would. Only handles |
245 | * quotes; if a string contains file globs, arithmetic operators, |
246 | * variables, backticks, redirections, or other special-to-the-shell |
247 | * features, the result will be different from the result a real shell |
248 | * would produce (the variables, backticks, etc. will be passed |
249 | * through literally instead of being expanded). This function is |
250 | * guaranteed to succeed if applied to the result of |
251 | * g_shell_quote(). If it fails, it returns %NULL and sets the |
252 | * error. The @quoted_string need not actually contain quoted or |
253 | * escaped text; g_shell_unquote() simply goes through the string and |
254 | * unquotes/unescapes anything that the shell would. Both single and |
255 | * double quotes are handled, as are escapes including escaped |
256 | * newlines. The return value must be freed with g_free(). Possible |
257 | * errors are in the #G_SHELL_ERROR domain. |
258 | * |
259 | * Shell quoting rules are a bit strange. Single quotes preserve the |
260 | * literal string exactly. escape sequences are not allowed; not even |
261 | * \' - if you want a ' in the quoted text, you have to do something |
262 | * like 'foo'\''bar'. Double quotes allow $, `, ", \, and newline to |
263 | * be escaped with backslash. Otherwise double quotes preserve things |
264 | * literally. |
265 | * |
266 | * Returns: (type filename): an unquoted string |
267 | **/ |
268 | gchar* |
269 | g_shell_unquote (const gchar *quoted_string, |
270 | GError **error) |
271 | { |
272 | gchar *unquoted; |
273 | gchar *end; |
274 | gchar *start; |
275 | GString *retval; |
276 | |
277 | g_return_val_if_fail (quoted_string != NULL, NULL); |
278 | |
279 | unquoted = g_strdup (str: quoted_string); |
280 | |
281 | start = unquoted; |
282 | end = unquoted; |
283 | retval = g_string_new (NULL); |
284 | |
285 | /* The loop allows cases such as |
286 | * "foo"blah blah'bar'woo foo"baz"la la la\'\''foo' |
287 | */ |
288 | while (*start) |
289 | { |
290 | /* Append all non-quoted chars, honoring backslash escape |
291 | */ |
292 | |
293 | while (*start && !(*start == '"' || *start == '\'')) |
294 | { |
295 | if (*start == '\\') |
296 | { |
297 | /* all characters can get escaped by backslash, |
298 | * except newline, which is removed if it follows |
299 | * a backslash outside of quotes |
300 | */ |
301 | |
302 | ++start; |
303 | if (*start) |
304 | { |
305 | if (*start != '\n') |
306 | g_string_append_c (retval, *start); |
307 | ++start; |
308 | } |
309 | } |
310 | else |
311 | { |
312 | g_string_append_c (retval, *start); |
313 | ++start; |
314 | } |
315 | } |
316 | |
317 | if (*start) |
318 | { |
319 | if (!unquote_string_inplace (str: start, end: &end, err: error)) |
320 | { |
321 | goto error; |
322 | } |
323 | else |
324 | { |
325 | g_string_append (string: retval, val: start); |
326 | start = end; |
327 | } |
328 | } |
329 | } |
330 | |
331 | g_free (mem: unquoted); |
332 | return g_string_free (string: retval, FALSE); |
333 | |
334 | error: |
335 | g_assert (error == NULL || *error != NULL); |
336 | |
337 | g_free (mem: unquoted); |
338 | g_string_free (string: retval, TRUE); |
339 | return NULL; |
340 | } |
341 | |
342 | /* g_parse_argv() does a semi-arbitrary weird subset of the way |
343 | * the shell parses a command line. We don't do variable expansion, |
344 | * don't understand that operators are tokens, don't do tilde expansion, |
345 | * don't do command substitution, no arithmetic expansion, IFS gets ignored, |
346 | * don't do filename globs, don't remove redirection stuff, etc. |
347 | * |
348 | * READ THE UNIX98 SPEC on "Shell Command Language" before changing |
349 | * the behavior of this code. |
350 | * |
351 | * Steps to parsing the argv string: |
352 | * |
353 | * - tokenize the string (but since we ignore operators, |
354 | * our tokenization may diverge from what the shell would do) |
355 | * note that tokenization ignores the internals of a quoted |
356 | * word and it always splits on spaces, not on IFS even |
357 | * if we used IFS. We also ignore "end of input indicator" |
358 | * (I guess this is control-D?) |
359 | * |
360 | * Tokenization steps, from UNIX98 with operator stuff removed, |
361 | * are: |
362 | * |
363 | * 1) "If the current character is backslash, single-quote or |
364 | * double-quote (\, ' or ") and it is not quoted, it will affect |
365 | * quoting for subsequent characters up to the end of the quoted |
366 | * text. The rules for quoting are as described in Quoting |
367 | * . During token recognition no substitutions will be actually |
368 | * performed, and the result token will contain exactly the |
369 | * characters that appear in the input (except for newline |
370 | * character joining), unmodified, including any embedded or |
371 | * enclosing quotes or substitution operators, between the quote |
372 | * mark and the end of the quoted text. The token will not be |
373 | * delimited by the end of the quoted field." |
374 | * |
375 | * 2) "If the current character is an unquoted newline character, |
376 | * the current token will be delimited." |
377 | * |
378 | * 3) "If the current character is an unquoted blank character, any |
379 | * token containing the previous character is delimited and the |
380 | * current character will be discarded." |
381 | * |
382 | * 4) "If the previous character was part of a word, the current |
383 | * character will be appended to that word." |
384 | * |
385 | * 5) "If the current character is a "#", it and all subsequent |
386 | * characters up to, but excluding, the next newline character |
387 | * will be discarded as a comment. The newline character that |
388 | * ends the line is not considered part of the comment. The |
389 | * "#" starts a comment only when it is at the beginning of a |
390 | * token. Since the search for the end-of-comment does not |
391 | * consider an escaped newline character specially, a comment |
392 | * cannot be continued to the next line." |
393 | * |
394 | * 6) "The current character will be used as the start of a new word." |
395 | * |
396 | * |
397 | * - for each token (word), perform portions of word expansion, namely |
398 | * field splitting (using default whitespace IFS) and quote |
399 | * removal. Field splitting may increase the number of words. |
400 | * Quote removal does not increase the number of words. |
401 | * |
402 | * "If the complete expansion appropriate for a word results in an |
403 | * empty field, that empty field will be deleted from the list of |
404 | * fields that form the completely expanded command, unless the |
405 | * original word contained single-quote or double-quote characters." |
406 | * - UNIX98 spec |
407 | * |
408 | * |
409 | */ |
410 | |
411 | static inline void |
412 | ensure_token (GString **token) |
413 | { |
414 | if (*token == NULL) |
415 | *token = g_string_new (NULL); |
416 | } |
417 | |
418 | static void |
419 | delimit_token (GString **token, |
420 | GSList **retval) |
421 | { |
422 | if (*token == NULL) |
423 | return; |
424 | |
425 | *retval = g_slist_prepend (list: *retval, data: g_string_free (string: *token, FALSE)); |
426 | |
427 | *token = NULL; |
428 | } |
429 | |
430 | static GSList* |
431 | tokenize_command_line (const gchar *command_line, |
432 | GError **error) |
433 | { |
434 | gchar current_quote; |
435 | const gchar *p; |
436 | GString *current_token = NULL; |
437 | GSList *retval = NULL; |
438 | gboolean quoted; |
439 | |
440 | current_quote = '\0'; |
441 | quoted = FALSE; |
442 | p = command_line; |
443 | |
444 | while (*p) |
445 | { |
446 | if (current_quote == '\\') |
447 | { |
448 | if (*p == '\n') |
449 | { |
450 | /* we append nothing; backslash-newline become nothing */ |
451 | } |
452 | else |
453 | { |
454 | /* we append the backslash and the current char, |
455 | * to be interpreted later after tokenization |
456 | */ |
457 | ensure_token (token: ¤t_token); |
458 | g_string_append_c (current_token, '\\'); |
459 | g_string_append_c (current_token, *p); |
460 | } |
461 | |
462 | current_quote = '\0'; |
463 | } |
464 | else if (current_quote == '#') |
465 | { |
466 | /* Discard up to and including next newline */ |
467 | while (*p && *p != '\n') |
468 | ++p; |
469 | |
470 | current_quote = '\0'; |
471 | |
472 | if (*p == '\0') |
473 | break; |
474 | } |
475 | else if (current_quote) |
476 | { |
477 | if (*p == current_quote && |
478 | /* check that it isn't an escaped double quote */ |
479 | !(current_quote == '"' && quoted)) |
480 | { |
481 | /* close the quote */ |
482 | current_quote = '\0'; |
483 | } |
484 | |
485 | /* Everything inside quotes, and the close quote, |
486 | * gets appended literally. |
487 | */ |
488 | |
489 | ensure_token (token: ¤t_token); |
490 | g_string_append_c (current_token, *p); |
491 | } |
492 | else |
493 | { |
494 | switch (*p) |
495 | { |
496 | case '\n': |
497 | delimit_token (token: ¤t_token, retval: &retval); |
498 | break; |
499 | |
500 | case ' ': |
501 | case '\t': |
502 | /* If the current token contains the previous char, delimit |
503 | * the current token. A nonzero length |
504 | * token should always contain the previous char. |
505 | */ |
506 | if (current_token && |
507 | current_token->len > 0) |
508 | { |
509 | delimit_token (token: ¤t_token, retval: &retval); |
510 | } |
511 | |
512 | /* discard all unquoted blanks (don't add them to a token) */ |
513 | break; |
514 | |
515 | |
516 | /* single/double quotes are appended to the token, |
517 | * escapes are maybe appended next time through the loop, |
518 | * comment chars are never appended. |
519 | */ |
520 | |
521 | case '\'': |
522 | case '"': |
523 | ensure_token (token: ¤t_token); |
524 | g_string_append_c (current_token, *p); |
525 | |
526 | G_GNUC_FALLTHROUGH; |
527 | case '\\': |
528 | current_quote = *p; |
529 | break; |
530 | |
531 | case '#': |
532 | if (p == command_line) |
533 | { /* '#' was the first char */ |
534 | current_quote = *p; |
535 | break; |
536 | } |
537 | switch(*(p-1)) |
538 | { |
539 | case ' ': |
540 | case '\n': |
541 | case '\0': |
542 | current_quote = *p; |
543 | break; |
544 | default: |
545 | ensure_token (token: ¤t_token); |
546 | g_string_append_c (current_token, *p); |
547 | break; |
548 | } |
549 | break; |
550 | |
551 | default: |
552 | /* Combines rules 4) and 6) - if we have a token, append to it, |
553 | * otherwise create a new token. |
554 | */ |
555 | ensure_token (token: ¤t_token); |
556 | g_string_append_c (current_token, *p); |
557 | break; |
558 | } |
559 | } |
560 | |
561 | /* We need to count consecutive backslashes mod 2, |
562 | * to detect escaped doublequotes. |
563 | */ |
564 | if (*p != '\\') |
565 | quoted = FALSE; |
566 | else |
567 | quoted = !quoted; |
568 | |
569 | ++p; |
570 | } |
571 | |
572 | delimit_token (token: ¤t_token, retval: &retval); |
573 | |
574 | if (current_quote) |
575 | { |
576 | if (current_quote == '\\') |
577 | g_set_error (err: error, |
578 | G_SHELL_ERROR, |
579 | code: G_SHELL_ERROR_BAD_QUOTING, |
580 | _("Text ended just after a “\\” character." |
581 | " (The text was “%s”)" ), |
582 | command_line); |
583 | else |
584 | g_set_error (err: error, |
585 | G_SHELL_ERROR, |
586 | code: G_SHELL_ERROR_BAD_QUOTING, |
587 | _("Text ended before matching quote was found for %c." |
588 | " (The text was “%s”)" ), |
589 | current_quote, command_line); |
590 | |
591 | goto error; |
592 | } |
593 | |
594 | if (retval == NULL) |
595 | { |
596 | g_set_error_literal (err: error, |
597 | G_SHELL_ERROR, |
598 | code: G_SHELL_ERROR_EMPTY_STRING, |
599 | _("Text was empty (or contained only whitespace)" )); |
600 | |
601 | goto error; |
602 | } |
603 | |
604 | /* we appended backward */ |
605 | retval = g_slist_reverse (list: retval); |
606 | |
607 | return retval; |
608 | |
609 | error: |
610 | g_assert (error == NULL || *error != NULL); |
611 | |
612 | g_slist_free_full (list: retval, free_func: g_free); |
613 | |
614 | return NULL; |
615 | } |
616 | |
617 | /** |
618 | * g_shell_parse_argv: |
619 | * @command_line: (type filename): command line to parse |
620 | * @argcp: (out) (optional): return location for number of args |
621 | * @argvp: (out) (optional) (array length=argcp zero-terminated=1) (element-type filename): |
622 | * return location for array of args |
623 | * @error: (optional): return location for error |
624 | * |
625 | * Parses a command line into an argument vector, in much the same way |
626 | * the shell would, but without many of the expansions the shell would |
627 | * perform (variable expansion, globs, operators, filename expansion, |
628 | * etc. are not supported). The results are defined to be the same as |
629 | * those you would get from a UNIX98 /bin/sh, as long as the input |
630 | * contains none of the unsupported shell expansions. If the input |
631 | * does contain such expansions, they are passed through |
632 | * literally. Possible errors are those from the #G_SHELL_ERROR |
633 | * domain. Free the returned vector with g_strfreev(). |
634 | * |
635 | * Returns: %TRUE on success, %FALSE if error set |
636 | **/ |
637 | gboolean |
638 | g_shell_parse_argv (const gchar *command_line, |
639 | gint *argcp, |
640 | gchar ***argvp, |
641 | GError **error) |
642 | { |
643 | /* Code based on poptParseArgvString() from libpopt */ |
644 | gint argc = 0; |
645 | gchar **argv = NULL; |
646 | GSList *tokens = NULL; |
647 | gint i; |
648 | GSList *tmp_list; |
649 | |
650 | g_return_val_if_fail (command_line != NULL, FALSE); |
651 | |
652 | tokens = tokenize_command_line (command_line, error); |
653 | if (tokens == NULL) |
654 | return FALSE; |
655 | |
656 | /* Because we can't have introduced any new blank space into the |
657 | * tokens (we didn't do any new expansions), we don't need to |
658 | * perform field splitting. If we were going to honor IFS or do any |
659 | * expansions, we would have to do field splitting on each word |
660 | * here. Also, if we were going to do any expansion we would need to |
661 | * remove any zero-length words that didn't contain quotes |
662 | * originally; but since there's no expansion we know all words have |
663 | * nonzero length, unless they contain quotes. |
664 | * |
665 | * So, we simply remove quotes, and don't do any field splitting or |
666 | * empty word removal, since we know there was no way to introduce |
667 | * such things. |
668 | */ |
669 | |
670 | argc = g_slist_length (list: tokens); |
671 | argv = g_new0 (gchar*, argc + 1); |
672 | i = 0; |
673 | tmp_list = tokens; |
674 | while (tmp_list) |
675 | { |
676 | argv[i] = g_shell_unquote (quoted_string: tmp_list->data, error); |
677 | |
678 | /* Since we already checked that quotes matched up in the |
679 | * tokenizer, this shouldn't be possible to reach I guess. |
680 | */ |
681 | if (argv[i] == NULL) |
682 | goto failed; |
683 | |
684 | tmp_list = g_slist_next (tmp_list); |
685 | ++i; |
686 | } |
687 | |
688 | g_slist_free_full (list: tokens, free_func: g_free); |
689 | |
690 | if (argcp) |
691 | *argcp = argc; |
692 | |
693 | if (argvp) |
694 | *argvp = argv; |
695 | else |
696 | g_strfreev (str_array: argv); |
697 | |
698 | return TRUE; |
699 | |
700 | failed: |
701 | |
702 | g_assert (error == NULL || *error != NULL); |
703 | g_strfreev (str_array: argv); |
704 | g_slist_free_full (list: tokens, free_func: g_free); |
705 | |
706 | return FALSE; |
707 | } |
708 | |