gshell.c source code [gtk/subprojects/glib/glib/gshell.c]

1	/ gshell.c - Shell-related utilities*
2	*
3	* Copyright 2000 Red Hat, Inc.
4	* g_execvpe implementation based on GNU libc execvp:
5	* Copyright 1991, 92, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Lesser General Public
9	* License as published by the Free Software Foundation; either
10	* version 2.1 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Lesser General Public License for more details.
16	*
17	* You should have received a copy of the GNU Lesser General Public License
18	* along with this library; if not, see <http://www.gnu.org/licenses/>.
19	*/
20
21	#include "config.h"
22
23	#include <string.h>
24
25	#include "gshell.h"
26
27	#include "gslist.h"
28	#include "gstrfuncs.h"
29	#include "gstring.h"
30	#include "gtestutils.h"
31	#include "glibintl.h"
32	#include "gthread.h"
33
34	/**
35	* SECTION:shell
36	* @title: Shell-related Utilities
37	* @short_description: shell-like commandline handling
38	*
39	* GLib provides the functions g_shell_quote() and g_shell_unquote()
40	* to handle shell-like quoting in strings. The function g_shell_parse_argv()
41	* parses a string similar to the way a POSIX shell (/bin/sh) would.
42	*
43	* Note that string handling in shells has many obscure and historical
44	* corner-cases which these functions do not necessarily reproduce. They
45	* are good enough in practice, though.
46	*/
47
48	/**
49	* G_SHELL_ERROR:
50	*
51	* Error domain for shell functions. Errors in this domain will be from
52	* the #GShellError enumeration. See #GError for information on error
53	* domains.
54	**/
55
56	/**
57	* GShellError:
58	* @G_SHELL_ERROR_BAD_QUOTING: Mismatched or otherwise mangled quoting.
59	* @G_SHELL_ERROR_EMPTY_STRING: String to be parsed was empty.
60	* @G_SHELL_ERROR_FAILED: Some other error.
61	*
62	* Error codes returned by shell functions.
63	**/
64	G_DEFINE_QUARK (g-shell-error-quark, g_shell_error)
65
66	/ Single quotes preserve the literal string exactly. escape*
67	* sequences are not allowed; not even \' - if you want a '
68	* in the quoted text, you have to do something like 'foo'\''bar'
69	*
70	* Double quotes allow $ ` " \ and newline to be escaped with backslash.
71	* Otherwise double quotes preserve things literally.
72	*/
73
74	static gboolean
75	unquote_string_inplace (gchar* str, gchar end, GError err)
76	{
77	gchar* dest;
78	gchar* s;
79	gchar quote_char;
80
81	g_return_val_if_fail(end != NULL, FALSE);
82	g_return_val_if_fail(err == NULL \|\| *err == NULL, FALSE);
83	g_return_val_if_fail(str != NULL, FALSE);
84
85	dest = s = str;
86
87	quote_char = *s;
88
89	if (!(s == `'"'` \|\| s == `'\''`))
90	{
91	g_set_error_literal (err,
92	G_SHELL_ERROR,
93	code: G_SHELL_ERROR_BAD_QUOTING,
94	_("Quoted text doesn’t begin with a quotation mark"));
95	*end = str;
96	return FALSE;
97	}
98
99	/ Skip the initial quote mark /
100	++s;
101
102	if (quote_char == `'"'`)
103	{
104	while (*s)
105	{
106	g_assert(s > dest); / loop invariant /
107
108	switch (*s)
109	{
110	case `'"'`:
111	/ End of the string, return now /
112	*dest = `'\0'`;
113	++s;
114	*end = s;
115	return TRUE;
116	break;
117
118	case `'\\'`:
119	/ Possible escaped quote or \ /
120	++s;
121	switch (*s)
122	{
123	case `'"'`:
124	case `'\\'`:
125	case '`':
126	case `'$'`:
127	case `'\n'`:
128	dest = s;
129	++s;
130	++dest;
131	break;
132
133	default:
134	/ not an escaped char /
135	*dest = `'\\'`;
136	++dest;
137	/ ++s already done. /
138	break;
139	}
140	break;
141
142	default:
143	dest = s;
144	++dest;
145	++s;
146	break;
147	}
148
149	g_assert(s > dest); / loop invariant /
150	}
151	}
152	else
153	{
154	while (*s)
155	{
156	g_assert(s > dest); / loop invariant /
157
158	if (*s == `'\''`)
159	{
160	/ End of the string, return now /
161	*dest = `'\0'`;
162	++s;
163	*end = s;
164	return TRUE;
165	}
166	else
167	{
168	dest = s;
169	++dest;
170	++s;
171	}
172
173	g_assert(s > dest); / loop invariant /
174	}
175	}
176
177	/ If we reach here this means the close quote was never encountered /
178
179	*dest = `'\0'`;
180
181	g_set_error_literal (err,
182	G_SHELL_ERROR,
183	code: G_SHELL_ERROR_BAD_QUOTING,
184	_("Unmatched quotation mark in command line or other shell-quoted text"));
185	*end = s;
186	return FALSE;
187	}
188
189	/**
190	* g_shell_quote:
191	* @unquoted_string: (type filename): a literal string
192	*
193	* Quotes a string so that the shell (/bin/sh) will interpret the
194	* quoted string to mean @unquoted_string. If you pass a filename to
195	* the shell, for example, you should first quote it with this
196	* function. The return value must be freed with g_free(). The
197	* quoting style used is undefined (single or double quotes may be
198	* used).
199	*
200	* Returns: (type filename): quoted string
201	**/
202	gchar*
203	g_shell_quote (const gchar *unquoted_string)
204	{
205	/ We always use single quotes, because the algorithm is cheesier.*
206	* We could use double if we felt like it, that might be more
207	* human-readable.
208	*/
209
210	const gchar *p;
211	GString *dest;
212
213	g_return_val_if_fail (unquoted_string != NULL, NULL);
214
215	dest = g_string_new (init: "'");
216
217	p = unquoted_string;
218
219	/ could speed this up a lot by appending chunks of text at a*
220	* time.
221	*/
222	while (*p)
223	{
224	/ Replace literal ' with a close ', a \', and an open ' /
225	if (*p == `'\''`)
226	g_string_append (string: dest, val: "'\\''");
227	else
228	g_string_append_c (dest, *p);
229
230	++p;
231	}
232
233	/ close the quote /
234	g_string_append_c (dest, `'\''`);
235
236	return g_string_free (string: dest, FALSE);
237	}
238
239	/**
240	* g_shell_unquote:
241	* @quoted_string: (type filename): shell-quoted string
242	* @error: error return location or NULL
243	*
244	* Unquotes a string as the shell (/bin/sh) would. Only handles
245	* quotes; if a string contains file globs, arithmetic operators,
246	* variables, backticks, redirections, or other special-to-the-shell
247	* features, the result will be different from the result a real shell
248	* would produce (the variables, backticks, etc. will be passed
249	* through literally instead of being expanded). This function is
250	* guaranteed to succeed if applied to the result of
251	* g_shell_quote(). If it fails, it returns %NULL and sets the
252	* error. The @quoted_string need not actually contain quoted or
253	* escaped text; g_shell_unquote() simply goes through the string and
254	* unquotes/unescapes anything that the shell would. Both single and
255	* double quotes are handled, as are escapes including escaped
256	* newlines. The return value must be freed with g_free(). Possible
257	* errors are in the #G_SHELL_ERROR domain.
258	*
259	* Shell quoting rules are a bit strange. Single quotes preserve the
260	* literal string exactly. escape sequences are not allowed; not even
261	* \' - if you want a ' in the quoted text, you have to do something
262	* like 'foo'\''bar'. Double quotes allow $, `, ", \, and newline to
263	* be escaped with backslash. Otherwise double quotes preserve things
264	* literally.
265	*
266	* Returns: (type filename): an unquoted string
267	**/
268	gchar*
269	g_shell_unquote (const gchar *quoted_string,
270	GError **error)
271	{
272	gchar *unquoted;
273	gchar *end;
274	gchar *start;
275	GString *retval;
276
277	g_return_val_if_fail (quoted_string != NULL, NULL);
278
279	unquoted = g_strdup (str: quoted_string);
280
281	start = unquoted;
282	end = unquoted;
283	retval = g_string_new (NULL);
284
285	/ The loop allows cases such as*
286	* "foo"blah blah'bar'woo foo"baz"la la la\'\''foo'
287	*/
288	while (*start)
289	{
290	/ Append all non-quoted chars, honoring backslash escape*
291	*/
292
293	while (start && !(start == `'"'` \|\| *start == `'\''`))
294	{
295	if (*start == `'\\'`)
296	{
297	/ all characters can get escaped by backslash,*
298	* except newline, which is removed if it follows
299	* a backslash outside of quotes
300	*/
301
302	++start;
303	if (*start)
304	{
305	if (*start != `'\n'`)
306	g_string_append_c (retval, *start);
307	++start;
308	}
309	}
310	else
311	{
312	g_string_append_c (retval, *start);
313	++start;
314	}
315	}
316
317	if (*start)
318	{
319	if (!unquote_string_inplace (str: start, end: &end, err: error))
320	{
321	goto error;
322	}
323	else
324	{
325	g_string_append (string: retval, val: start);
326	start = end;
327	}
328	}
329	}
330
331	g_free (mem: unquoted);
332	return g_string_free (string: retval, FALSE);
333
334	error:
335	g_assert (error == NULL \|\| *error != NULL);
336
337	g_free (mem: unquoted);
338	g_string_free (string: retval, TRUE);
339	return NULL;
340	}
341
342	/ g_parse_argv() does a semi-arbitrary weird subset of the way*
343	* the shell parses a command line. We don't do variable expansion,
344	* don't understand that operators are tokens, don't do tilde expansion,
345	* don't do command substitution, no arithmetic expansion, IFS gets ignored,
346	* don't do filename globs, don't remove redirection stuff, etc.
347	*
348	* READ THE UNIX98 SPEC on "Shell Command Language" before changing
349	* the behavior of this code.
350	*
351	* Steps to parsing the argv string:
352	*
353	* - tokenize the string (but since we ignore operators,
354	* our tokenization may diverge from what the shell would do)
355	* note that tokenization ignores the internals of a quoted
356	* word and it always splits on spaces, not on IFS even
357	* if we used IFS. We also ignore "end of input indicator"
358	* (I guess this is control-D?)
359	*
360	* Tokenization steps, from UNIX98 with operator stuff removed,
361	* are:
362	*
363	* 1) "If the current character is backslash, single-quote or
364	* double-quote (\, ' or ") and it is not quoted, it will affect
365	* quoting for subsequent characters up to the end of the quoted
366	* text. The rules for quoting are as described in Quoting
367	* . During token recognition no substitutions will be actually
368	* performed, and the result token will contain exactly the
369	* characters that appear in the input (except for newline
370	* character joining), unmodified, including any embedded or
371	* enclosing quotes or substitution operators, between the quote
372	* mark and the end of the quoted text. The token will not be
373	* delimited by the end of the quoted field."
374	*
375	* 2) "If the current character is an unquoted newline character,
376	* the current token will be delimited."
377	*
378	* 3) "If the current character is an unquoted blank character, any
379	* token containing the previous character is delimited and the
380	* current character will be discarded."
381	*
382	* 4) "If the previous character was part of a word, the current
383	* character will be appended to that word."
384	*
385	* 5) "If the current character is a "#", it and all subsequent
386	* characters up to, but excluding, the next newline character
387	* will be discarded as a comment. The newline character that
388	* ends the line is not considered part of the comment. The
389	* "#" starts a comment only when it is at the beginning of a
390	* token. Since the search for the end-of-comment does not
391	* consider an escaped newline character specially, a comment
392	* cannot be continued to the next line."
393	*
394	* 6) "The current character will be used as the start of a new word."
395	*
396	*
397	* - for each token (word), perform portions of word expansion, namely
398	* field splitting (using default whitespace IFS) and quote
399	* removal. Field splitting may increase the number of words.
400	* Quote removal does not increase the number of words.
401	*
402	* "If the complete expansion appropriate for a word results in an
403	* empty field, that empty field will be deleted from the list of
404	* fields that form the completely expanded command, unless the
405	* original word contained single-quote or double-quote characters."
406	* - UNIX98 spec
407	*
408	*
409	*/
410
411	static inline void
412	ensure_token (GString **token)
413	{
414	if (*token == NULL)
415	*token = g_string_new (NULL);
416	}
417
418	static void
419	delimit_token (GString **token,
420	GSList **retval)
421	{
422	if (*token == NULL)
423	return;
424
425	retval = g_slist_prepend (list: retval, data: g_string_free (string: *token, FALSE));
426
427	*token = NULL;
428	}
429
430	static GSList*
431	tokenize_command_line (const gchar *command_line,
432	GError **error)
433	{
434	gchar current_quote;
435	const gchar *p;
436	GString *current_token = NULL;
437	GSList *retval = NULL;
438	gboolean quoted;
439
440	current_quote = `'\0'`;
441	quoted = FALSE;
442	p = command_line;
443
444	while (*p)
445	{
446	if (current_quote == `'\\'`)
447	{
448	if (*p == `'\n'`)
449	{
450	/ we append nothing; backslash-newline become nothing /
451	}
452	else
453	{
454	/ we append the backslash and the current char,*
455	* to be interpreted later after tokenization
456	*/
457	ensure_token (token: &current_token);
458	g_string_append_c (current_token, `'\\'`);
459	g_string_append_c (current_token, *p);
460	}
461
462	current_quote = `'\0'`;
463	}
464	else if (current_quote == `'#'`)
465	{
466	/ Discard up to and including next newline /
467	while (p && p != `'\n'`)
468	++p;
469
470	current_quote = `'\0'`;
471
472	if (*p == `'\0'`)
473	break;
474	}
475	else if (current_quote)
476	{
477	if (*p == current_quote &&
478	/ check that it isn't an escaped double quote /
479	!(current_quote == `'"'` && quoted))
480	{
481	/ close the quote /
482	current_quote = `'\0'`;
483	}
484
485	/ Everything inside quotes, and the close quote,*
486	* gets appended literally.
487	*/
488
489	ensure_token (token: &current_token);
490	g_string_append_c (current_token, *p);
491	}
492	else
493	{
494	switch (*p)
495	{
496	case `'\n'`:
497	delimit_token (token: &current_token, retval: &retval);
498	break;
499
500	case `' '`:
501	case `'\t'`:
502	/ If the current token contains the previous char, delimit*
503	* the current token. A nonzero length
504	* token should always contain the previous char.
505	*/
506	if (current_token &&
507	current_token->len > `0`)
508	{
509	delimit_token (token: &current_token, retval: &retval);
510	}
511
512	/ discard all unquoted blanks (don't add them to a token) /
513	break;
514
515
516	/ single/double quotes are appended to the token,*
517	* escapes are maybe appended next time through the loop,
518	* comment chars are never appended.
519	*/
520
521	case `'\''`:
522	case `'"'`:
523	ensure_token (token: &current_token);
524	g_string_append_c (current_token, *p);
525
526	G_GNUC_FALLTHROUGH;
527	case `'\\'`:
528	current_quote = *p;
529	break;
530
531	case `'#'`:
532	if (p == command_line)
533	{ / '#' was the first char /
534	current_quote = *p;
535	break;
536	}
537	switch(*(p-`1`))
538	{
539	case `' '`:
540	case `'\n'`:
541	case `'\0'`:
542	current_quote = *p;
543	break;
544	default:
545	ensure_token (token: &current_token);
546	g_string_append_c (current_token, *p);
547	break;
548	}
549	break;
550
551	default:
552	/ Combines rules 4) and 6) - if we have a token, append to it,*
553	* otherwise create a new token.
554	*/
555	ensure_token (token: &current_token);
556	g_string_append_c (current_token, *p);
557	break;
558	}
559	}
560
561	/ We need to count consecutive backslashes mod 2,*
562	* to detect escaped doublequotes.
563	*/
564	if (*p != `'\\'`)
565	quoted = FALSE;
566	else
567	quoted = !quoted;
568
569	++p;
570	}
571
572	delimit_token (token: &current_token, retval: &retval);
573
574	if (current_quote)
575	{
576	if (current_quote == `'\\'`)
577	g_set_error (err: error,
578	G_SHELL_ERROR,
579	code: G_SHELL_ERROR_BAD_QUOTING,
580	_("Text ended just after a “\\” character."
581	" (The text was “%s”)"),
582	command_line);
583	else
584	g_set_error (err: error,
585	G_SHELL_ERROR,
586	code: G_SHELL_ERROR_BAD_QUOTING,
587	_("Text ended before matching quote was found for %c."
588	" (The text was “%s”)"),
589	current_quote, command_line);
590
591	goto error;
592	}
593
594	if (retval == NULL)
595	{
596	g_set_error_literal (err: error,
597	G_SHELL_ERROR,
598	code: G_SHELL_ERROR_EMPTY_STRING,
599	_("Text was empty (or contained only whitespace)"));
600
601	goto error;
602	}
603
604	/ we appended backward /
605	retval = g_slist_reverse (list: retval);
606
607	return retval;
608
609	error:
610	g_assert (error == NULL \|\| *error != NULL);
611
612	g_slist_free_full (list: retval, free_func: g_free);
613
614	return NULL;
615	}
616
617	/**
618	* g_shell_parse_argv:
619	* @command_line: (type filename): command line to parse
620	* @argcp: (out) (optional): return location for number of args
621	* @argvp: (out) (optional) (array length=argcp zero-terminated=1) (element-type filename):
622	* return location for array of args
623	* @error: (optional): return location for error
624	*
625	* Parses a command line into an argument vector, in much the same way
626	* the shell would, but without many of the expansions the shell would
627	* perform (variable expansion, globs, operators, filename expansion,
628	* etc. are not supported). The results are defined to be the same as
629	* those you would get from a UNIX98 /bin/sh, as long as the input
630	* contains none of the unsupported shell expansions. If the input
631	* does contain such expansions, they are passed through
632	* literally. Possible errors are those from the #G_SHELL_ERROR
633	* domain. Free the returned vector with g_strfreev().
634	*
635	* Returns: %TRUE on success, %FALSE if error set
636	**/
637	gboolean
638	g_shell_parse_argv (const gchar *command_line,
639	gint *argcp,
640	gchar ***argvp,
641	GError **error)
642	{
643	/ Code based on poptParseArgvString() from libpopt /
644	gint argc = `0`;
645	gchar **argv = NULL;
646	GSList *tokens = NULL;
647	gint i;
648	GSList *tmp_list;
649
650	g_return_val_if_fail (command_line != NULL, FALSE);
651
652	tokens = tokenize_command_line (command_line, error);
653	if (tokens == NULL)
654	return FALSE;
655
656	/ Because we can't have introduced any new blank space into the*
657	* tokens (we didn't do any new expansions), we don't need to
658	* perform field splitting. If we were going to honor IFS or do any
659	* expansions, we would have to do field splitting on each word
660	* here. Also, if we were going to do any expansion we would need to
661	* remove any zero-length words that didn't contain quotes
662	* originally; but since there's no expansion we know all words have
663	* nonzero length, unless they contain quotes.
664	*
665	* So, we simply remove quotes, and don't do any field splitting or
666	* empty word removal, since we know there was no way to introduce
667	* such things.
668	*/
669
670	argc = g_slist_length (list: tokens);
671	argv = g_new0 (gchar*, argc + `1`);
672	i = `0`;
673	tmp_list = tokens;
674	while (tmp_list)
675	{
676	argv[i] = g_shell_unquote (quoted_string: tmp_list->data, error);
677
678	/ Since we already checked that quotes matched up in the*
679	* tokenizer, this shouldn't be possible to reach I guess.
680	*/
681	if (argv[i] == NULL)
682	goto failed;
683
684	tmp_list = g_slist_next (tmp_list);
685	++i;
686	}
687
688	g_slist_free_full (list: tokens, free_func: g_free);
689
690	if (argcp)
691	*argcp = argc;
692
693	if (argvp)
694	*argvp = argv;
695	else
696	g_strfreev (str_array: argv);
697
698	return TRUE;
699
700	failed:
701
702	g_assert (error == NULL \|\| *error != NULL);
703	g_strfreev (str_array: argv);
704	g_slist_free_full (list: tokens, free_func: g_free);
705
706	return FALSE;
707	}
708

source code of gtk/subprojects/glib/glib/gshell.c