Args.cpp source code [lldb/source/Utility/Args.cpp]

1	//===-- Args.cpp ----------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "lldb/Utility/Args.h"
10	#include "lldb/Utility/FileSpec.h"
11	#include "lldb/Utility/Stream.h"
12	#include "lldb/Utility/StringList.h"
13	#include "llvm/ADT/StringSwitch.h"
14
15	using namespace lldb;
16	using namespace lldb_private;
17
18	// A helper function for argument parsing.
19	// Parses the initial part of the first argument using normal double quote
20	// rules: backslash escapes the double quote and itself. The parsed string is
21	// appended to the second argument. The function returns the unparsed portion
22	// of the string, starting at the closing quote.
23	static llvm::StringRef ParseDoubleQuotes(llvm::StringRef quoted,
24	std::string &result) {
25	// Inside double quotes, '\' and '"' are special.
26	static const char *k_escapable_characters = "\"\\";
27	while (true) {
28	// Skip over regular characters and append them.
29	size_t regular = quoted.find_first_of(Chars: k_escapable_characters);
30	result += quoted.substr(Start: `0`, N: regular);
31	quoted = quoted.substr(Start: regular);
32
33	// If we have reached the end of string or the closing quote, we're done.
34	if (quoted.empty() \|\| quoted.front() == `'"'`)
35	break;
36
37	// We have found a backslash.
38	quoted = quoted.drop_front();
39
40	if (quoted.empty()) {
41	// A lone backslash at the end of string, let's just append it.
42	result += `'\\'`;
43	break;
44	}
45
46	// If the character after the backslash is not an allowed escapable
47	// character, we leave the character sequence untouched.
48	if (strchr(s: k_escapable_characters, c: quoted.front()) == nullptr)
49	result += `'\\'`;
50
51	result += quoted.front();
52	quoted = quoted.drop_front();
53	}
54
55	return quoted;
56	}
57
58	static size_t ArgvToArgc(const char **argv) {
59	if (!argv)
60	return `0`;
61	size_t count = `0`;
62	while (*argv++)
63	++count;
64	return count;
65	}
66
67	// Trims all whitespace that can separate command line arguments from the left
68	// side of the string.
69	static llvm::StringRef ltrimForArgs(llvm::StringRef str, size_t &shift) {
70	static const char *k_space_separators = " \t";
71	llvm::StringRef result = str.ltrim(Chars: k_space_separators);
72	shift = result.data() - str.data();
73	return result;
74	}
75
76	// A helper function for SetCommandString. Parses a single argument from the
77	// command string, processing quotes and backslashes in a shell-like manner.
78	// The function returns a tuple consisting of the parsed argument, the quote
79	// char used, and the unparsed portion of the string starting at the first
80	// unqouted, unescaped whitespace character.
81	static std::tuple<std::string, char, llvm::StringRef>
82	ParseSingleArgument(llvm::StringRef command) {
83	// Argument can be split into multiple discontiguous pieces, for example:
84	// "Hello ""World"
85	// this would result in a single argument "Hello World" (without the quotes)
86	// since the quotes would be removed and there is not space between the
87	// strings.
88	std::string arg;
89
90	// Since we can have multiple quotes that form a single command in a command
91	// like: "Hello "world'!' (which will make a single argument "Hello world!")
92	// we remember the first quote character we encounter and use that for the
93	// quote character.
94	char first_quote_char = `'\0'`;
95
96	bool arg_complete = false;
97	do {
98	// Skip over regular characters and append them.
99	size_t regular = command.find_first_of(Chars: " \t\r\"'`\\");
100	arg += command.substr(Start: `0`, N: regular);
101	command = command.substr(Start: regular);
102
103	if (command.empty())
104	break;
105
106	char special = command.front();
107	command = command.drop_front();
108	switch (special) {
109	case `'\\'`:
110	if (command.empty()) {
111	arg += `'\\'`;
112	break;
113	}
114
115	// If the character after the backslash is not an allowed escapable
116	// character, we leave the character sequence untouched.
117	if (strchr(s: " \t\\'\"`", c: command.front()) == nullptr)
118	arg += `'\\'`;
119
120	arg += command.front();
121	command = command.drop_front();
122
123	break;
124
125	case `' '`:
126	case `'\t'`:
127	case `'\r'`:
128	// We are not inside any quotes, we just found a space after an argument.
129	// We are done.
130	arg_complete = true;
131	break;
132
133	case `'"'`:
134	case `'\''`:
135	case '`':
136	// We found the start of a quote scope.
137	if (first_quote_char == `'\0'`)
138	first_quote_char = special;
139
140	if (special == `'"'`)
141	command = ParseDoubleQuotes(quoted: command, result&: arg);
142	else {
143	// For single quotes, we simply skip ahead to the matching quote
144	// character (or the end of the string).
145	size_t quoted = command.find(C: special);
146	arg += command.substr(Start: `0`, N: quoted);
147	command = command.substr(Start: quoted);
148	}
149
150	// If we found a closing quote, skip it.
151	if (!command.empty())
152	command = command.drop_front();
153
154	break;
155	}
156	} while (!arg_complete);
157
158	return std::make_tuple(args&: arg, args&: first_quote_char, args&: command);
159	}
160
161	Args::ArgEntry::ArgEntry(llvm::StringRef str, char quote,
162	std::optional<uint16_t> column)
163	: quote(quote), column (column) {
164	size_t size = str.size();
165	ptr.reset(p: new char[size + `1`]);
166
167	::memcpy(dest: data(), src: str.data() ? str.data() : "", n: size);
168	ptr [size] = `0`;
169	}
170
171	// Args constructor
172	Args::Args(llvm::StringRef command) { SetCommandString(command); }
173
174	Args::Args(const Args &rhs) { *this = rhs; }
175
176	Args::Args(const StringList &list) : Args () {
177	for (const std::string &arg : list)
178	AppendArgument(arg_str: arg);
179	}
180
181	Args::Args(llvm::ArrayRef<llvm::StringRef> args) : Args () {
182	for (llvm::StringRef arg : args)
183	AppendArgument(arg_str: arg);
184	}
185
186	Args &Args::operator=(const Args &rhs) {
187	Clear();
188
189	m_argv.clear();
190	m_entries.clear();
191	for (auto &entry : rhs.m_entries) {
192	m_entries.emplace_back(args: entry.ref(), args: entry.quote, args: entry.column);
193	m_argv.push_back(x: m_entries.back().data());
194	}
195	m_argv.push_back(x: nullptr);
196	return *this;
197	}
198
199	// Destructor
200	Args::~Args() = default;
201
202	void Args::Dump(Stream &s, const char label_name) const* {
203	if (!label_name)
204	return;
205
206	int i = `0`;
207	for (auto &entry : m_entries) {
208	s.Indent();
209	s.Format(format: "{0}[{1}]=\"{2}\"\n", args&: label_name, args: i++, args: entry.ref());
210	}
211	s.Format(format: "{0}[{1}]=NULL\n", args&: label_name, args&: i);
212	s.EOL();
213	}
214
215	bool Args::GetCommandString(std::string &command) const {
216	command.clear();
217
218	for (size_t i = `0`; i < m_entries.size(); ++i) {
219	if (i > `0`)
220	command += `' '`;
221	char quote = m_entries [i].quote;
222	if (quote != `'\0'`)
223	command += quote;
224	command += m_entries [i].ref();
225	if (quote != `'\0'`)
226	command += quote;
227	}
228
229	return !m_entries.empty();
230	}
231
232	bool Args::GetQuotedCommandString(std::string &command) const {
233	command.clear();
234
235	for (size_t i = `0`; i < m_entries.size(); ++i) {
236	if (i > `0`)
237	command += `' '`;
238
239	if (m_entries [i].quote) {
240	command += m_entries [i].quote;
241	command += m_entries [i].ref();
242	command += m_entries [i].quote;
243	} else {
244	command += m_entries [i].ref();
245	}
246	}
247
248	return !m_entries.empty();
249	}
250
251	void Args::SetCommandString(llvm::StringRef command) {
252	Clear();
253	m_argv.clear();
254
255	uint16_t column = `1`;
256	size_t shift = `0`;
257	command = ltrimForArgs(str: command, shift);
258	column += shift;
259	std::string arg;
260	char quote;
261	while (!command.empty()) {
262	const char *prev = command.data();
263	std::tie(args&: arg, args&: quote, args&: command) = ParseSingleArgument(command);
264	m_entries.emplace_back(args&: arg, args&: quote, args&: column);
265	m_argv.push_back(x: m_entries.back().data());
266	command = ltrimForArgs(str: command, shift);
267	column += shift;
268	column += command.data() - prev;
269	}
270	m_argv.push_back(x: nullptr);
271	}
272
273	const char Args::GetArgumentAtIndex(size_t idx) const* {
274	if (idx < m_argv.size())
275	return m_argv [idx];
276	return nullptr;
277	}
278
279	char **Args::GetArgumentVector() {
280	assert(!m_argv.empty());
281	// TODO: functions like execve and posix_spawnp exhibit undefined behavior
282	// when argv or envp is null. So the code below is actually wrong. However,
283	// other code in LLDB depends on it being null. The code has been acting
284	// this way for some time, so it makes sense to leave it this way until
285	// someone has the time to come along and fix it.
286	return (m_argv.size() > `1`) ? m_argv.data() : nullptr;
287	}
288
289	const char *Args::GetConstArgumentVector() const* {
290	assert(!m_argv.empty());
291	return (m_argv.size() > `1`) ? const_cast<const char **>(m_argv.data())
292	: nullptr;
293	}
294
295	void Args::Shift() {
296	// Don't pop the last NULL terminator from the argv array
297	if (m_entries.empty())
298	return;
299	m_argv.erase(position: m_argv.begin());
300	m_entries.erase(position: m_entries.begin());
301	}
302
303	void Args::Unshift(llvm::StringRef arg_str, char quote_char) {
304	InsertArgumentAtIndex(idx: `0`, arg_str, quote_char);
305	}
306
307	void Args::AppendArguments(const Args &rhs) {
308	assert(m_argv.size() == m_entries.size() + `1`);
309	assert(m_argv.back() == nullptr);
310	m_argv.pop_back();
311	for (auto &entry : rhs.m_entries) {
312	m_entries.emplace_back(args: entry.ref(), args: entry.quote, args: entry.column);
313	m_argv.push_back(x: m_entries.back().data());
314	}
315	m_argv.push_back(x: nullptr);
316	}
317
318	void Args::AppendArguments(const char **argv) {
319	size_t argc = ArgvToArgc(argv);
320
321	assert(m_argv.size() == m_entries.size() + `1`);
322	assert(m_argv.back() == nullptr);
323	m_argv.pop_back();
324	for (auto arg : llvm::ArrayRef(argv, argc)) {
325	m_entries.emplace_back(args&: arg, args: `'\0'`, args: std::nullopt);
326	m_argv.push_back(x: m_entries.back().data());
327	}
328
329	m_argv.push_back(x: nullptr);
330	}
331
332	void Args::AppendArgument(llvm::StringRef arg_str, char quote_char) {
333	InsertArgumentAtIndex(idx: GetArgumentCount(), arg_str, quote_char);
334	}
335
336	void Args::InsertArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
337	char quote_char) {
338	assert(m_argv.size() == m_entries.size() + `1`);
339	assert(m_argv.back() == nullptr);
340
341	if (idx > m_entries.size())
342	return;
343	m_entries.emplace(position: m_entries.begin() + idx, args&: arg_str, args&: quote_char, args: std::nullopt);
344	m_argv.insert(position: m_argv.begin() + idx, x: m_entries [idx].data());
345	}
346
347	void Args::ReplaceArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
348	char quote_char) {
349	assert(m_argv.size() == m_entries.size() + `1`);
350	assert(m_argv.back() == nullptr);
351
352	if (idx >= m_entries.size())
353	return;
354
355	m_entries [idx] = ArgEntry (arg_str, quote_char, std::nullopt);
356	m_argv [idx] = m_entries [idx].data();
357	}
358
359	void Args::DeleteArgumentAtIndex(size_t idx) {
360	if (idx >= m_entries.size())
361	return;
362
363	m_argv.erase(position: m_argv.begin() + idx);
364	m_entries.erase(position: m_entries.begin() + idx);
365	}
366
367	void Args::SetArguments(size_t argc, const char **argv) {
368	Clear();
369
370	auto args = llvm::ArrayRef(argv, argc);
371	m_entries.resize(new_size: argc);
372	m_argv.resize(new_size: argc + `1`);
373	for (size_t i = `0`; i < args.size(); ++i) {
374	char quote =
375	((args [i][`0`] == `'\''`) \|\| (args [i][`0`] == `'"'`) \|\| (args [i][`0`] == '`'))
376	? args [i][`0`]
377	: `'\0'`;
378
379	m_entries [i] = ArgEntry (args [i], quote, std::nullopt);
380	m_argv [i] = m_entries [i].data();
381	}
382	}
383
384	void Args::SetArguments(const char **argv) {
385	SetArguments(argc: ArgvToArgc(argv), argv);
386	}
387
388	void Args::Clear() {
389	m_entries.clear();
390	m_argv.clear();
391	m_argv.push_back(x: nullptr);
392	}
393
394	std::string Args::GetShellSafeArgument(const FileSpec &shell,
395	llvm::StringRef unsafe_arg) {
396	struct ShellDescriptor {
397	llvm::StringRef m_basename;
398	llvm::StringRef m_escapables;
399	};
400
401	static ShellDescriptor g_Shells[] = {{.m_basename: "bash", .m_escapables: " '\"<>()&;"},
402	{.m_basename: "fish", .m_escapables: " '\"<>()&\\\|;"},
403	{.m_basename: "tcsh", .m_escapables: " '\"<>()&;"},
404	{.m_basename: "zsh", .m_escapables: " '\"<>()&;\\\|"},
405	{.m_basename: "sh", .m_escapables: " '\"<>()&;"}};
406
407	// safe minimal set
408	llvm::StringRef escapables = " '\"";
409
410	auto basename = shell.GetFilename().GetStringRef();
411	if (!basename.empty()) {
412	for (const auto &Shell : g_Shells) {
413	if (Shell.m_basename == basename) {
414	escapables = Shell.m_escapables;
415	break;
416	}
417	}
418	}
419
420	std::string safe_arg;
421	safe_arg.reserve(res_arg: unsafe_arg.size());
422	// Add a \ before every character that needs to be escaped.
423	for (char c : unsafe_arg) {
424	if (escapables.contains(C: c))
425	safe_arg.push_back(c: `'\\'`);
426	safe_arg.push_back(c: c);
427	}
428	return safe_arg;
429	}
430
431	lldb::Encoding Args::StringToEncoding(llvm::StringRef s,
432	lldb::Encoding fail_value) {
433	return llvm::StringSwitch<lldb::Encoding>(s)
434	.Case(S: "uint", Value: eEncodingUint)
435	.Case(S: "sint", Value: eEncodingSint)
436	.Case(S: "ieee754", Value: eEncodingIEEE754)
437	.Case(S: "vector", Value: eEncodingVector)
438	.Default(Value: fail_value);
439	}
440
441	uint32_t Args::StringToGenericRegister(llvm::StringRef s) {
442	if (s.empty())
443	return LLDB_INVALID_REGNUM;
444	uint32_t result = llvm::StringSwitch<uint32_t>(s)
445	.Case(S: "pc", LLDB_REGNUM_GENERIC_PC)
446	.Case(S: "sp", LLDB_REGNUM_GENERIC_SP)
447	.Case(S: "fp", LLDB_REGNUM_GENERIC_FP)
448	.Cases(S0: "ra", S1: "lr", LLDB_REGNUM_GENERIC_RA)
449	.Case(S: "flags", LLDB_REGNUM_GENERIC_FLAGS)
450	.Case(S: "arg1", LLDB_REGNUM_GENERIC_ARG1)
451	.Case(S: "arg2", LLDB_REGNUM_GENERIC_ARG2)
452	.Case(S: "arg3", LLDB_REGNUM_GENERIC_ARG3)
453	.Case(S: "arg4", LLDB_REGNUM_GENERIC_ARG4)
454	.Case(S: "arg5", LLDB_REGNUM_GENERIC_ARG5)
455	.Case(S: "arg6", LLDB_REGNUM_GENERIC_ARG6)
456	.Case(S: "arg7", LLDB_REGNUM_GENERIC_ARG7)
457	.Case(S: "arg8", LLDB_REGNUM_GENERIC_ARG8)
458	.Case(S: "tp", LLDB_REGNUM_GENERIC_TP)
459	.Default(LLDB_INVALID_REGNUM);
460	return result;
461	}
462
463	void Args::EncodeEscapeSequences(const char *src, std::string &dst) {
464	dst.clear();
465	if (src) {
466	for (const char p = src; p != `'\0'`; ++p) {
467	size_t non_special_chars = ::strcspn(s: p, reject: "\\");
468	if (non_special_chars > `0`) {
469	dst.append(s: p, n: non_special_chars);
470	p += non_special_chars;
471	if (*p == `'\0'`)
472	break;
473	}
474
475	if (*p == `'\\'`) {
476	++p; // skip the slash
477	switch (*p) {
478	case `'a'`:
479	dst.append(n: `1`, c: `'\a'`);
480	break;
481	case `'b'`:
482	dst.append(n: `1`, c: `'\b'`);
483	break;
484	case `'f'`:
485	dst.append(n: `1`, c: `'\f'`);
486	break;
487	case `'n'`:
488	dst.append(n: `1`, c: `'\n'`);
489	break;
490	case `'r'`:
491	dst.append(n: `1`, c: `'\r'`);
492	break;
493	case `'t'`:
494	dst.append(n: `1`, c: `'\t'`);
495	break;
496	case `'v'`:
497	dst.append(n: `1`, c: `'\v'`);
498	break;
499	case `'\\'`:
500	dst.append(n: `1`, c: `'\\'`);
501	break;
502	case `'\''`:
503	dst.append(n: `1`, c: `'\''`);
504	break;
505	case `'"'`:
506	dst.append(n: `1`, c: `'"'`);
507	break;
508	case `'0'`:
509	// 1 to 3 octal chars
510	{
511	// Make a string that can hold onto the initial zero char, up to 3
512	// octal digits, and a terminating NULL.
513	char oct_str[`5`] = {`'\0'`, `'\0'`, `'\0'`, `'\0'`, `'\0'`};
514
515	int i;
516	for (i = `0`; (p[i] >= `'0'` && p[i] <= `'7'`) && i < `4`; ++i)
517	oct_str[i] = p[i];
518
519	// We don't want to consume the last octal character since the main
520	// for loop will do this for us, so we advance p by one less than i
521	// (even if i is zero)
522	p += i - `1`;
523	unsigned long octal_value = ::strtoul(nptr: oct_str, endptr: nullptr, base: `8`);
524	if (octal_value <= UINT8_MAX) {
525	dst.append(n: `1`, c: static_cast<char>(octal_value));
526	}
527	}
528	break;
529
530	case `'x'`:
531	// hex number in the format
532	if (isxdigit(p[`1`])) {
533	++p; // Skip the 'x'
534
535	// Make a string that can hold onto two hex chars plus a
536	// NULL terminator
537	char hex_str[`3`] = {*p, `'\0'`, `'\0'`};
538	if (isxdigit(p[`1`])) {
539	++p; // Skip the first of the two hex chars
540	hex_str[`1`] = *p;
541	}
542
543	unsigned long hex_value = strtoul(nptr: hex_str, endptr: nullptr, base: `16`);
544	if (hex_value <= UINT8_MAX)
545	dst.append(n: `1`, c: static_cast<char>(hex_value));
546	} else {
547	dst.append(n: `1`, c: `'x'`);
548	}
549	break;
550
551	default:
552	// Just desensitize any other character by just printing what came
553	// after the '\'
554	dst.append(n: `1`, c: *p);
555	break;
556	}
557	}
558	}
559	}
560	}
561
562	void Args::ExpandEscapedCharacters(const char *src, std::string &dst) {
563	dst.clear();
564	if (src) {
565	for (const char p = src; p != `'\0'`; ++p) {
566	if (llvm::isPrint(C: *p))
567	dst.append(n: `1`, c: *p);
568	else {
569	switch (*p) {
570	case `'\a'`:
571	dst.append(s: "\\a");
572	break;
573	case `'\b'`:
574	dst.append(s: "\\b");
575	break;
576	case `'\f'`:
577	dst.append(s: "\\f");
578	break;
579	case `'\n'`:
580	dst.append(s: "\\n");
581	break;
582	case `'\r'`:
583	dst.append(s: "\\r");
584	break;
585	case `'\t'`:
586	dst.append(s: "\\t");
587	break;
588	case `'\v'`:
589	dst.append(s: "\\v");
590	break;
591	case `'\''`:
592	dst.append(s: "\\'");
593	break;
594	case `'"'`:
595	dst.append(s: "\\\"");
596	break;
597	case `'\\'`:
598	dst.append(s: "\\\\");
599	break;
600	default: {
601	// Just encode as octal
602	dst.append(s: "\\0");
603	char octal_str[`32`];
604	snprintf(s: octal_str, maxlen: sizeof(octal_str), format: "%o", *p);
605	dst.append(s: octal_str);
606	} break;
607	}
608	}
609	}
610	}
611	}
612
613	std::string Args::EscapeLLDBCommandArgument(const std::string &arg,
614	char quote_char) {
615	const char chars_to_escape = nullptr*;
616	switch (quote_char) {
617	case `'\0'`:
618	chars_to_escape = " \t\\'\"`";
619	break;
620	case `'"'`:
621	chars_to_escape = "$\"`\\";
622	break;
623	case '`':
624	case `'\''`:
625	return arg;
626	default:
627	assert(false && "Unhandled quote character");
628	return arg;
629	}
630
631	std::string res;
632	res.reserve(res_arg: arg.size());
633	for (char c : arg) {
634	if (::strchr(s: chars_to_escape, c: c))
635	res.push_back(c: `'\\'`);
636	res.push_back(c: c);
637	}
638	return res;
639	}
640
641	OptionsWithRaw::OptionsWithRaw(llvm::StringRef arg_string) {
642	SetFromString(arg_string);
643	}
644
645	void OptionsWithRaw::SetFromString(llvm::StringRef arg_string) {
646	const llvm::StringRef original_args = arg_string;
647
648	size_t shift;
649	arg_string = ltrimForArgs(str: arg_string, shift);
650	std::string arg;
651	char quote;
652
653	// If the string doesn't start with a dash, we just have no options and just
654	// a raw part.
655	if (!arg_string.starts_with(Prefix: "-")) {
656	m_suffix = std::string (original_args);
657	return;
658	}
659
660	bool found_suffix = false;
661	while (!arg_string.empty()) {
662	// The length of the prefix before parsing.
663	std::size_t prev_prefix_length = original_args.size() - arg_string.size();
664
665	// Parse the next argument from the remaining string.
666	std::tie(args&: arg, args&: quote, args&: arg_string) = ParseSingleArgument(command: arg_string);
667
668	// If we get an unquoted '--' argument, then we reached the suffix part
669	// of the command.
670	Args::ArgEntry entry(arg, quote, std::nullopt);
671	if (!entry.IsQuoted() && arg == "--") {
672	// The remaining line is the raw suffix, and the line we parsed so far
673	// needs to be interpreted as arguments.
674	m_has_args = true;
675	m_suffix = std::string (arg_string);
676	found_suffix = true;
677
678	// The length of the prefix after parsing.
679	std::size_t prefix_length = original_args.size() - arg_string.size();
680
681	// Take the string we know contains all the arguments and actually parse
682	// it as proper arguments.
683	llvm::StringRef prefix = original_args.take_front(N: prev_prefix_length);
684	m_args = Args (prefix);
685	m_arg_string = prefix;
686
687	// We also record the part of the string that contains the arguments plus
688	// the delimiter.
689	m_arg_string_with_delimiter = original_args.take_front(N: prefix_length);
690
691	// As the rest of the string became the raw suffix, we are done here.
692	break;
693	}
694
695	arg_string = ltrimForArgs(str: arg_string, shift);
696	}
697
698	// If we didn't find a suffix delimiter, the whole string is the raw suffix.
699	if (!found_suffix)
700	m_suffix = std::string (original_args);
701	}
702

source code of lldb/source/Utility/Args.cpp