StringRef.h source code [llvm/include/llvm/ADT/StringRef.h]

1	//===- StringRef.h - Constant String Reference Wrapper ----------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_ADT_STRINGREF_H
10	#define LLVM_ADT_STRINGREF_H
11
12	#include "llvm/ADT/DenseMapInfo.h"
13	#include "llvm/ADT/STLFunctionalExtras.h"
14	#include "llvm/ADT/iterator_range.h"
15	#include "llvm/Support/Compiler.h"
16	#include <algorithm>
17	#include <cassert>
18	#include <cstddef>
19	#include <cstring>
20	#include <limits>
21	#include <string>
22	#include <string_view>
23	#include <type_traits>
24	#include <utility>
25
26	namespace llvm {
27
28	class APInt;
29	class hash_code;
30	template <typename T> class SmallVectorImpl;
31	class StringRef;
32
33	/// Helper functions for StringRef::getAsInteger.
34	bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
35	unsigned long long &Result);
36
37	bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
38
39	bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
40	unsigned long long &Result);
41	bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
42
43	/// StringRef - Represent a constant reference to a string, i.e. a character
44	/// array and a length, which need not be null terminated.
45	///
46	/// This class does not own the string data, it is expected to be used in
47	/// situations where the character data resides in some other buffer, whose
48	/// lifetime extends past that of the StringRef. For this reason, it is not in
49	/// general safe to store a StringRef.
50	class LLVM_GSL_POINTER StringRef {
51	public:
52	static constexpr size_t npos = ~size_t(`0`);
53
54	using iterator = const char *;
55	using const_iterator = const char *;
56	using size_type = size_t;
57
58	private:
59	/// The start of the string, in an external buffer.
60	const char Data = nullptr*;
61
62	/// The length of the string.
63	size_t Length = `0`;
64
65	// Workaround memcmp issue with null pointers (undefined behavior)
66	// by providing a specialized version
67	static int compareMemory(const char Lhs, const* char *Rhs, size_t Length) {
68	if (Length == `0`) { return `0`; }
69	return ::memcmp(s1: Lhs,s2: Rhs,n: Length);
70	}
71
72	public:
73	/// @name Constructors
74	/// @{
75
76	/// Construct an empty string ref.
77	/implicit/ StringRef() = default;
78
79	/// Disable conversion from nullptr. This prevents things like
80	/// if (S == nullptr)
81	StringRef(std::nullptr_t) = delete;
82
83	/// Construct a string ref from a cstring.
84	/implicit/ constexpr StringRef(const char *Str)
85	: Data(Str), Length(Str ?
86	// GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen.
87	#if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8
88	__builtin_strlen(Str)
89	#else
90	std::char_traits<char>::length(s: Str)
91	#endif
92	: `0`) {
93	}
94
95	/// Construct a string ref from a pointer and length.
96	/implicit/ constexpr StringRef(const char *data, size_t length)
97	: Data(data), Length(length) {}
98
99	/// Construct a string ref from an std::string.
100	/implicit/ StringRef(const std::string &Str)
101	: Data(Str.data()), Length(Str.length()) {}
102
103	/// Construct a string ref from an std::string_view.
104	/implicit/ constexpr StringRef(std::string_view Str)
105	: Data(Str.data()), Length(Str.size()) {}
106
107	/// @}
108	/// @name Iterators
109	/// @{
110
111	iterator begin() const { return Data; }
112
113	iterator end() const { return Data + Length; }
114
115	const unsigned char bytes_begin() const* {
116	return reinterpret_cast<const unsigned char *>(begin());
117	}
118	const unsigned char bytes_end() const* {
119	return reinterpret_cast<const unsigned char *>(end());
120	}
121	iterator_range<const unsigned char > bytes() const* {
122	return make_range(x: bytes_begin(), y: bytes_end());
123	}
124
125	/// @}
126	/// @name String Operations
127	/// @{
128
129	/// data - Get a pointer to the start of the string (which may not be null
130	/// terminated).
131	[[nodiscard]] constexpr const char data() const* { return Data; }
132
133	/// empty - Check if the string is empty.
134	[[nodiscard]] constexpr bool empty() const { return Length == `0`; }
135
136	/// size - Get the string size.
137	[[nodiscard]] constexpr size_t size() const { return Length; }
138
139	/// front - Get the first character in the string.
140	[[nodiscard]] char front() const {
141	assert(!empty());
142	return Data[`0`];
143	}
144
145	/// back - Get the last character in the string.
146	[[nodiscard]] char back() const {
147	assert(!empty());
148	return Data[Length-`1`];
149	}
150
151	// copy - Allocate copy in Allocator and return StringRef to it.
152	template <typename Allocator>
153	[[nodiscard]] StringRef copy(Allocator &A) const {
154	// Don't request a length 0 copy from the allocator.
155	if (empty())
156	return StringRef ();
157	char S = A.template Allocate<char*>(Length);
158	std::copy(begin(), end(), S);
159	return StringRef (S, Length);
160	}
161
162	/// equals - Check for string equality, this is more efficient than
163	/// compare() when the relative ordering of inequal strings isn't needed.
164	[[nodiscard]] bool equals(StringRef RHS) const {
165	return (Length == RHS.Length &&
166	compareMemory(Lhs: Data, Rhs: RHS.Data, Length: RHS.Length) == `0`);
167	}
168
169	/// Check for string equality, ignoring case.
170	[[nodiscard]] bool equals_insensitive(StringRef RHS) const {
171	return Length == RHS.Length && compare_insensitive(RHS) == `0`;
172	}
173
174	/// compare - Compare two strings; the result is negative, zero, or positive
175	/// if this string is lexicographically less than, equal to, or greater than
176	/// the \p RHS.
177	[[nodiscard]] int compare(StringRef RHS) const {
178	// Check the prefix for a mismatch.
179	if (int Res = compareMemory(Lhs: Data, Rhs: RHS.Data, Length: std::min(a: Length, b: RHS.Length)))
180	return Res < `0` ? -`1` : `1`;
181
182	// Otherwise the prefixes match, so we only need to check the lengths.
183	if (Length == RHS.Length)
184	return `0`;
185	return Length < RHS.Length ? -`1` : `1`;
186	}
187
188	/// Compare two strings, ignoring case.
189	[[nodiscard]] int compare_insensitive(StringRef RHS) const;
190
191	/// compare_numeric - Compare two strings, treating sequences of digits as
192	/// numbers.
193	[[nodiscard]] int compare_numeric(StringRef RHS) const;
194
195	/// Determine the edit distance between this string and another
196	/// string.
197	///
198	/// \param Other the string to compare this string against.
199	///
200	/// \param AllowReplacements whether to allow character
201	/// replacements (change one character into another) as a single
202	/// operation, rather than as two operations (an insertion and a
203	/// removal).
204	///
205	/// \param MaxEditDistance If non-zero, the maximum edit distance that
206	/// this routine is allowed to compute. If the edit distance will exceed
207	/// that maximum, returns \c MaxEditDistance+1.
208	///
209	/// \returns the minimum number of character insertions, removals,
210	/// or (if \p AllowReplacements is \c true) replacements needed to
211	/// transform one of the given strings into the other. If zero,
212	/// the strings are identical.
213	[[nodiscard]] unsigned edit_distance(StringRef Other,
214	bool AllowReplacements = true,
215	unsigned MaxEditDistance = `0`) const;
216
217	[[nodiscard]] unsigned
218	edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
219	unsigned MaxEditDistance = `0`) const;
220
221	/// str - Get the contents as an std::string.
222	[[nodiscard]] std::string str() const {
223	if (!Data) return std::string ();
224	return std::string (Data, Length);
225	}
226
227	/// @}
228	/// @name Operator Overloads
229	/// @{
230
231	[[nodiscard]] char operator[](size_t Index) const {
232	assert(Index < Length && "Invalid index!");
233	return Data[Index];
234	}
235
236	/// Disallow accidental assignment from a temporary std::string.
237	///
238	/// The declaration here is extra complicated so that `stringRef = {}`
239	/// and `stringRef = "abc"` continue to select the move assignment operator.
240	template <typename T>
241	std::enable_if_t<std::is_same<T, std::string>::value, StringRef> &
242	operator=(T &&Str) = delete;
243
244	/// @}
245	/// @name Type Conversions
246	/// @{
247
248	constexpr operator std::string_view() const {
249	return std::string_view (data(), size());
250	}
251
252	/// @}
253	/// @name String Predicates
254	/// @{
255
256	/// Check if this string starts with the given \p Prefix.
257	[[nodiscard]] bool starts_with(StringRef Prefix) const {
258	return Length >= Prefix.Length &&
259	compareMemory(Lhs: Data, Rhs: Prefix.Data, Length: Prefix.Length) == `0`;
260	}
261	[[nodiscard]] LLVM_DEPRECATED(
262	"Use starts_with instead",
263	"starts_with") bool startswith(StringRef Prefix) const {
264	return starts_with(Prefix);
265	}
266
267	/// Check if this string starts with the given \p Prefix, ignoring case.
268	[[nodiscard]] bool starts_with_insensitive(StringRef Prefix) const;
269
270	/// Check if this string ends with the given \p Suffix.
271	[[nodiscard]] bool ends_with(StringRef Suffix) const {
272	return Length >= Suffix.Length &&
273	compareMemory(Lhs: end() - Suffix.Length, Rhs: Suffix.Data, Length: Suffix.Length) ==
274	`0`;
275	}
276	[[nodiscard]] LLVM_DEPRECATED(
277	"Use ends_with instead",
278	"ends_with") bool endswith(StringRef Suffix) const {
279	return ends_with(Suffix);
280	}
281
282	/// Check if this string ends with the given \p Suffix, ignoring case.
283	[[nodiscard]] bool ends_with_insensitive(StringRef Suffix) const;
284
285	/// @}
286	/// @name String Searching
287	/// @{
288
289	/// Search for the first character \p C in the string.
290	///
291	/// \returns The index of the first occurrence of \p C, or npos if not
292	/// found.
293	[[nodiscard]] size_t find(char C, size_t From = `0`) const {
294	return std::string_view(*this).find(c: C, pos: From);
295	}
296
297	/// Search for the first character \p C in the string, ignoring case.
298	///
299	/// \returns The index of the first occurrence of \p C, or npos if not
300	/// found.
301	[[nodiscard]] size_t find_insensitive(char C, size_t From = `0`) const;
302
303	/// Search for the first character satisfying the predicate \p F
304	///
305	/// \returns The index of the first character satisfying \p F starting from
306	/// \p From, or npos if not found.
307	[[nodiscard]] size_t find_if(function_ref<bool(char)> F,
308	size_t From = `0`) const {
309	StringRef S = drop_front(N: From);
310	while (!S.empty()) {
311	if (F (S.front()))
312	return size() - S.size();
313	S = S.drop_front();
314	}
315	return npos;
316	}
317
318	/// Search for the first character not satisfying the predicate \p F
319	///
320	/// \returns The index of the first character not satisfying \p F starting
321	/// from \p From, or npos if not found.
322	[[nodiscard]] size_t find_if_not(function_ref<bool(char)> F,
323	size_t From = `0`) const {
324	return find_if(F: [F](char c) { return !F (c); }, From);
325	}
326
327	/// Search for the first string \p Str in the string.
328	///
329	/// \returns The index of the first occurrence of \p Str, or npos if not
330	/// found.
331	[[nodiscard]] size_t find(StringRef Str, size_t From = `0`) const;
332
333	/// Search for the first string \p Str in the string, ignoring case.
334	///
335	/// \returns The index of the first occurrence of \p Str, or npos if not
336	/// found.
337	[[nodiscard]] size_t find_insensitive(StringRef Str, size_t From = `0`) const;
338
339	/// Search for the last character \p C in the string.
340	///
341	/// \returns The index of the last occurrence of \p C, or npos if not
342	/// found.
343	[[nodiscard]] size_t rfind(char C, size_t From = npos) const {
344	size_t I = std::min(a: From, b: Length);
345	while (I) {
346	--I;
347	if (Data[I] == C)
348	return I;
349	}
350	return npos;
351	}
352
353	/// Search for the last character \p C in the string, ignoring case.
354	///
355	/// \returns The index of the last occurrence of \p C, or npos if not
356	/// found.
357	[[nodiscard]] size_t rfind_insensitive(char C, size_t From = npos) const;
358
359	/// Search for the last string \p Str in the string.
360	///
361	/// \returns The index of the last occurrence of \p Str, or npos if not
362	/// found.
363	[[nodiscard]] size_t rfind(StringRef Str) const;
364
365	/// Search for the last string \p Str in the string, ignoring case.
366	///
367	/// \returns The index of the last occurrence of \p Str, or npos if not
368	/// found.
369	[[nodiscard]] size_t rfind_insensitive(StringRef Str) const;
370
371	/// Find the first character in the string that is \p C, or npos if not
372	/// found. Same as find.
373	[[nodiscard]] size_t find_first_of(char C, size_t From = `0`) const {
374	return find(C, From);
375	}
376
377	/// Find the first character in the string that is in \p Chars, or npos if
378	/// not found.
379	///
380	/// Complexity: O(size() + Chars.size())
381	[[nodiscard]] size_t find_first_of(StringRef Chars, size_t From = `0`) const;
382
383	/// Find the first character in the string that is not \p C or npos if not
384	/// found.
385	[[nodiscard]] size_t find_first_not_of(char C, size_t From = `0`) const;
386
387	/// Find the first character in the string that is not in the string
388	/// \p Chars, or npos if not found.
389	///
390	/// Complexity: O(size() + Chars.size())
391	[[nodiscard]] size_t find_first_not_of(StringRef Chars,
392	size_t From = `0`) const;
393
394	/// Find the last character in the string that is \p C, or npos if not
395	/// found.
396	[[nodiscard]] size_t find_last_of(char C, size_t From = npos) const {
397	return rfind(C, From);
398	}
399
400	/// Find the last character in the string that is in \p C, or npos if not
401	/// found.
402	///
403	/// Complexity: O(size() + Chars.size())
404	[[nodiscard]] size_t find_last_of(StringRef Chars,
405	size_t From = npos) const;
406
407	/// Find the last character in the string that is not \p C, or npos if not
408	/// found.
409	[[nodiscard]] size_t find_last_not_of(char C, size_t From = npos) const;
410
411	/// Find the last character in the string that is not in \p Chars, or
412	/// npos if not found.
413	///
414	/// Complexity: O(size() + Chars.size())
415	[[nodiscard]] size_t find_last_not_of(StringRef Chars,
416	size_t From = npos) const;
417
418	/// Return true if the given string is a substring of this, and false*
419	/// otherwise.
420	[[nodiscard]] bool contains(StringRef Other) const {
421	return find(Str: Other) != npos;
422	}
423
424	/// Return true if the given character is contained in this, and false*
425	/// otherwise.
426	[[nodiscard]] bool contains(char C) const {
427	return find_first_of(C) != npos;
428	}
429
430	/// Return true if the given string is a substring of this, and false*
431	/// otherwise.
432	[[nodiscard]] bool contains_insensitive(StringRef Other) const {
433	return find_insensitive(Str: Other) != npos;
434	}
435
436	/// Return true if the given character is contained in this, and false*
437	/// otherwise.
438	[[nodiscard]] bool contains_insensitive(char C) const {
439	return find_insensitive(C) != npos;
440	}
441
442	/// @}
443	/// @name Helpful Algorithms
444	/// @{
445
446	/// Return the number of occurrences of \p C in the string.
447	[[nodiscard]] size_t count(char C) const {
448	size_t Count = `0`;
449	for (size_t I = `0`; I != Length; ++I)
450	if (Data[I] == C)
451	++Count;
452	return Count;
453	}
454
455	/// Return the number of non-overlapped occurrences of \p Str in
456	/// the string.
457	size_t count(StringRef Str) const;
458
459	/// Parse the current string as an integer of the specified radix. If
460	/// \p Radix is specified as zero, this does radix autosensing using
461	/// extended C rules: 0 is octal, 0x is hex, 0b is binary.
462	///
463	/// If the string is invalid or if only a subset of the string is valid,
464	/// this returns true to signify the error. The string is considered
465	/// erroneous if empty or if it overflows T.
466	template <typename T> bool getAsInteger(unsigned Radix, T &Result) const {
467	if constexpr (std::numeric_limits<T>::is_signed) {
468	long long LLVal;
469	if (getAsSignedInteger(Str: *this, Radix, Result&: LLVal) \|\|
470	static_cast<T>(LLVal) != LLVal)
471	return true;
472	Result = LLVal;
473	} else {
474	unsigned long long ULLVal;
475	// The additional cast to unsigned long long is required to avoid the
476	// Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
477	// 'unsigned __int64' when instantiating getAsInteger with T = bool.
478	if (getAsUnsignedInteger(Str: *this, Radix, Result&: ULLVal) \|\|
479	static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
480	return true;
481	Result = ULLVal;
482	}
483	return false;
484	}
485
486	/// Parse the current string as an integer of the specified radix. If
487	/// \p Radix is specified as zero, this does radix autosensing using
488	/// extended C rules: 0 is octal, 0x is hex, 0b is binary.
489	///
490	/// If the string does not begin with a number of the specified radix,
491	/// this returns true to signify the error. The string is considered
492	/// erroneous if empty or if it overflows T.
493	/// The portion of the string representing the discovered numeric value
494	/// is removed from the beginning of the string.
495	template <typename T> bool consumeInteger(unsigned Radix, T &Result) {
496	if constexpr (std::numeric_limits<T>::is_signed) {
497	long long LLVal;
498	if (consumeSignedInteger(Str&: *this, Radix, Result&: LLVal) \|\|
499	static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
500	return true;
501	Result = LLVal;
502	} else {
503	unsigned long long ULLVal;
504	if (consumeUnsignedInteger(Str&: *this, Radix, Result&: ULLVal) \|\|
505	static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
506	return true;
507	Result = ULLVal;
508	}
509	return false;
510	}
511
512	/// Parse the current string as an integer of the specified \p Radix, or of
513	/// an autosensed radix if the \p Radix given is 0. The current value in
514	/// \p Result is discarded, and the storage is changed to be wide enough to
515	/// store the parsed integer.
516	///
517	/// \returns true if the string does not solely consist of a valid
518	/// non-empty number in the appropriate base.
519	///
520	/// APInt::fromString is superficially similar but assumes the
521	/// string is well-formed in the given radix.
522	bool getAsInteger(unsigned Radix, APInt &Result) const;
523
524	/// Parse the current string as an integer of the specified \p Radix. If
525	/// \p Radix is specified as zero, this does radix autosensing using
526	/// extended C rules: 0 is octal, 0x is hex, 0b is binary.
527	///
528	/// If the string does not begin with a number of the specified radix,
529	/// this returns true to signify the error. The string is considered
530	/// erroneous if empty.
531	/// The portion of the string representing the discovered numeric value
532	/// is removed from the beginning of the string.
533	bool consumeInteger(unsigned Radix, APInt &Result);
534
535	/// Parse the current string as an IEEE double-precision floating
536	/// point value. The string must be a well-formed double.
537	///
538	/// If \p AllowInexact is false, the function will fail if the string
539	/// cannot be represented exactly. Otherwise, the function only fails
540	/// in case of an overflow or underflow, or an invalid floating point
541	/// representation.
542	bool getAsDouble(double &Result, bool AllowInexact = true) const;
543
544	/// @}
545	/// @name String Operations
546	/// @{
547
548	// Convert the given ASCII string to lowercase.
549	[[nodiscard]] std::string lower() const;
550
551	/// Convert the given ASCII string to uppercase.
552	[[nodiscard]] std::string upper() const;
553
554	/// @}
555	/// @name Substring Operations
556	/// @{
557
558	/// Return a reference to the substring from [Start, Start + N).
559	///
560	/// \param Start The index of the starting character in the substring; if
561	/// the index is npos or greater than the length of the string then the
562	/// empty substring will be returned.
563	///
564	/// \param N The number of characters to included in the substring. If N
565	/// exceeds the number of characters remaining in the string, the string
566	/// suffix (starting with \p Start) will be returned.
567	[[nodiscard]] constexpr StringRef substr(size_t Start,
568	size_t N = npos) const {
569	Start = std::min(a: Start, b: Length);
570	return StringRef (Data + Start, std::min(a: N, b: Length - Start));
571	}
572
573	/// Return a StringRef equal to 'this' but with only the first \p N
574	/// elements remaining. If \p N is greater than the length of the
575	/// string, the entire string is returned.
576	[[nodiscard]] StringRef take_front(size_t N = `1`) const {
577	if (N >= size())
578	return *this;
579	return drop_back(N: size() - N);
580	}
581
582	/// Return a StringRef equal to 'this' but with only the last \p N
583	/// elements remaining. If \p N is greater than the length of the
584	/// string, the entire string is returned.
585	[[nodiscard]] StringRef take_back(size_t N = `1`) const {
586	if (N >= size())
587	return *this;
588	return drop_front(N: size() - N);
589	}
590
591	/// Return the longest prefix of 'this' such that every character
592	/// in the prefix satisfies the given predicate.
593	[[nodiscard]] StringRef take_while(function_ref<bool(char)> F) const {
594	return substr(Start: `0`, N: find_if_not(F));
595	}
596
597	/// Return the longest prefix of 'this' such that no character in
598	/// the prefix satisfies the given predicate.
599	[[nodiscard]] StringRef take_until(function_ref<bool(char)> F) const {
600	return substr(Start: `0`, N: find_if(F));
601	}
602
603	/// Return a StringRef equal to 'this' but with the first \p N elements
604	/// dropped.
605	[[nodiscard]] StringRef drop_front(size_t N = `1`) const {
606	assert(size() >= N && "Dropping more elements than exist");
607	return substr(Start: N);
608	}
609
610	/// Return a StringRef equal to 'this' but with the last \p N elements
611	/// dropped.
612	[[nodiscard]] StringRef drop_back(size_t N = `1`) const {
613	assert(size() >= N && "Dropping more elements than exist");
614	return substr(Start: `0`, N: size()-N);
615	}
616
617	/// Return a StringRef equal to 'this', but with all characters satisfying
618	/// the given predicate dropped from the beginning of the string.
619	[[nodiscard]] StringRef drop_while(function_ref<bool(char)> F) const {
620	return substr(Start: find_if_not(F));
621	}
622
623	/// Return a StringRef equal to 'this', but with all characters not
624	/// satisfying the given predicate dropped from the beginning of the string.
625	[[nodiscard]] StringRef drop_until(function_ref<bool(char)> F) const {
626	return substr(Start: find_if(F));
627	}
628
629	/// Returns true if this StringRef has the given prefix and removes that
630	/// prefix.
631	bool consume_front(StringRef Prefix) {
632	if (!starts_with(Prefix))
633	return false;
634
635	*this = substr(Start: Prefix.size());
636	return true;
637	}
638
639	/// Returns true if this StringRef has the given prefix, ignoring case,
640	/// and removes that prefix.
641	bool consume_front_insensitive(StringRef Prefix) {
642	if (!starts_with_insensitive(Prefix))
643	return false;
644
645	*this = substr(Start: Prefix.size());
646	return true;
647	}
648
649	/// Returns true if this StringRef has the given suffix and removes that
650	/// suffix.
651	bool consume_back(StringRef Suffix) {
652	if (!ends_with(Suffix))
653	return false;
654
655	*this = substr(Start: `0`, N: size() - Suffix.size());
656	return true;
657	}
658
659	/// Returns true if this StringRef has the given suffix, ignoring case,
660	/// and removes that suffix.
661	bool consume_back_insensitive(StringRef Suffix) {
662	if (!ends_with_insensitive(Suffix))
663	return false;
664
665	*this = substr(Start: `0`, N: size() - Suffix.size());
666	return true;
667	}
668
669	/// Return a reference to the substring from [Start, End).
670	///
671	/// \param Start The index of the starting character in the substring; if
672	/// the index is npos or greater than the length of the string then the
673	/// empty substring will be returned.
674	///
675	/// \param End The index following the last character to include in the
676	/// substring. If this is npos or exceeds the number of characters
677	/// remaining in the string, the string suffix (starting with \p Start)
678	/// will be returned. If this is less than \p Start, an empty string will
679	/// be returned.
680	[[nodiscard]] StringRef slice(size_t Start, size_t End) const {
681	Start = std::min(a: Start, b: Length);
682	End = std::clamp(val: End, lo: Start, hi: Length);
683	return StringRef (Data + Start, End - Start);
684	}
685
686	/// Split into two substrings around the first occurrence of a separator
687	/// character.
688	///
689	/// If \p Separator is in the string, then the result is a pair (LHS, RHS)
690	/// such that (this == LHS + Separator + RHS) is true and RHS is*
691	/// maximal. If \p Separator is not in the string, then the result is a
692	/// pair (LHS, RHS) where (this == LHS) and (RHS == "").*
693	///
694	/// \param Separator The character to split on.
695	/// \returns The split substrings.
696	[[nodiscard]] std::pair<StringRef, StringRef> split(char Separator) const {
697	return split(Separator: StringRef (&Separator, `1`));
698	}
699
700	/// Split into two substrings around the first occurrence of a separator
701	/// string.
702	///
703	/// If \p Separator is in the string, then the result is a pair (LHS, RHS)
704	/// such that (this == LHS + Separator + RHS) is true and RHS is*
705	/// maximal. If \p Separator is not in the string, then the result is a
706	/// pair (LHS, RHS) where (this == LHS) and (RHS == "").*
707	///
708	/// \param Separator - The string to split on.
709	/// \return - The split substrings.
710	[[nodiscard]] std::pair<StringRef, StringRef>
711	split(StringRef Separator) const {
712	size_t Idx = find(Str: Separator);
713	if (Idx == npos)
714	return std::make_pair(x: *this, y: StringRef ());
715	return std::make_pair(x: slice(Start: `0`, End: Idx), y: slice(Start: Idx + Separator.size(), End: npos));
716	}
717
718	/// Split into two substrings around the last occurrence of a separator
719	/// string.
720	///
721	/// If \p Separator is in the string, then the result is a pair (LHS, RHS)
722	/// such that (this == LHS + Separator + RHS) is true and RHS is*
723	/// minimal. If \p Separator is not in the string, then the result is a
724	/// pair (LHS, RHS) where (this == LHS) and (RHS == "").*
725	///
726	/// \param Separator - The string to split on.
727	/// \return - The split substrings.
728	[[nodiscard]] std::pair<StringRef, StringRef>
729	rsplit(StringRef Separator) const {
730	size_t Idx = rfind(Str: Separator);
731	if (Idx == npos)
732	return std::make_pair(x: *this, y: StringRef ());
733	return std::make_pair(x: slice(Start: `0`, End: Idx), y: slice(Start: Idx + Separator.size(), End: npos));
734	}
735
736	/// Split into substrings around the occurrences of a separator string.
737	///
738	/// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
739	/// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
740	/// elements are added to A.
741	/// If \p KeepEmpty is false, empty strings are not added to \p A. They
742	/// still count when considering \p MaxSplit
743	/// An useful invariant is that
744	/// Separator.join(A) == this if MaxSplit == -1 and KeepEmpty == true*
745	///
746	/// \param A - Where to put the substrings.
747	/// \param Separator - The string to split on.
748	/// \param MaxSplit - The maximum number of times the string is split.
749	/// \param KeepEmpty - True if empty substring should be added.
750	void split(SmallVectorImpl<StringRef> &A,
751	StringRef Separator, int MaxSplit = -`1`,
752	bool KeepEmpty = true) const;
753
754	/// Split into substrings around the occurrences of a separator character.
755	///
756	/// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
757	/// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
758	/// elements are added to A.
759	/// If \p KeepEmpty is false, empty strings are not added to \p A. They
760	/// still count when considering \p MaxSplit
761	/// An useful invariant is that
762	/// Separator.join(A) == this if MaxSplit == -1 and KeepEmpty == true*
763	///
764	/// \param A - Where to put the substrings.
765	/// \param Separator - The string to split on.
766	/// \param MaxSplit - The maximum number of times the string is split.
767	/// \param KeepEmpty - True if empty substring should be added.
768	void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -`1`,
769	bool KeepEmpty = true) const;
770
771	/// Split into two substrings around the last occurrence of a separator
772	/// character.
773	///
774	/// If \p Separator is in the string, then the result is a pair (LHS, RHS)
775	/// such that (this == LHS + Separator + RHS) is true and RHS is*
776	/// minimal. If \p Separator is not in the string, then the result is a
777	/// pair (LHS, RHS) where (this == LHS) and (RHS == "").*
778	///
779	/// \param Separator - The character to split on.
780	/// \return - The split substrings.
781	[[nodiscard]] std::pair<StringRef, StringRef> rsplit(char Separator) const {
782	return rsplit(Separator: StringRef (&Separator, `1`));
783	}
784
785	/// Return string with consecutive \p Char characters starting from the
786	/// the left removed.
787	[[nodiscard]] StringRef ltrim(char Char) const {
788	return drop_front(N: std::min(a: Length, b: find_first_not_of(C: Char)));
789	}
790
791	/// Return string with consecutive characters in \p Chars starting from
792	/// the left removed.
793	[[nodiscard]] StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
794	return drop_front(N: std::min(a: Length, b: find_first_not_of(Chars)));
795	}
796
797	/// Return string with consecutive \p Char characters starting from the
798	/// right removed.
799	[[nodiscard]] StringRef rtrim(char Char) const {
800	return drop_back(N: Length - std::min(a: Length, b: find_last_not_of(C: Char) + `1`));
801	}
802
803	/// Return string with consecutive characters in \p Chars starting from
804	/// the right removed.
805	[[nodiscard]] StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
806	return drop_back(N: Length - std::min(a: Length, b: find_last_not_of(Chars) + `1`));
807	}
808
809	/// Return string with consecutive \p Char characters starting from the
810	/// left and right removed.
811	[[nodiscard]] StringRef trim(char Char) const {
812	return ltrim(Char).rtrim(Char);
813	}
814
815	/// Return string with consecutive characters in \p Chars starting from
816	/// the left and right removed.
817	[[nodiscard]] StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
818	return ltrim(Chars).rtrim(Chars);
819	}
820
821	/// Detect the line ending style of the string.
822	///
823	/// If the string contains a line ending, return the line ending character
824	/// sequence that is detected. Otherwise return '\n' for unix line endings.
825	///
826	/// \return - The line ending character sequence.
827	[[nodiscard]] StringRef detectEOL() const {
828	size_t Pos = find(C: `'\r'`);
829	if (Pos == npos) {
830	// If there is no carriage return, assume unix
831	return "\n";
832	}
833	if (Pos + `1` < Length && Data[Pos + `1`] == `'\n'`)
834	return "\r\n"; // Windows
835	if (Pos > `0` && Data[Pos - `1`] == `'\n'`)
836	return "\n\r"; // You monster!
837	return "\r"; // Classic Mac
838	}
839	/// @}
840	};
841
842	/// A wrapper around a string literal that serves as a proxy for constructing
843	/// global tables of StringRefs with the length computed at compile time.
844	/// In order to avoid the invocation of a global constructor, StringLiteral
845	/// should only* be used in a constexpr context, as such:*
846	///
847	/// constexpr StringLiteral S("test");
848	///
849	class StringLiteral : public StringRef {
850	private:
851	constexpr StringLiteral(const char *Str, size_t N) : StringRef (Str, N) {
852	}
853
854	public:
855	template <size_t N>
856	constexpr StringLiteral(const char (&Str)[N])
857	#if defined(__clang__) && __has_attribute(enable_if)
858	#pragma clang diagnostic push
859	#pragma clang diagnostic ignored "-Wgcc-compat"
860	__attribute((enable_if(__builtin_strlen(Str) == N - `1`,
861	"invalid string literal")))
862	#pragma clang diagnostic pop
863	#endif
864	: StringRef(Str, N - `1`) {
865	}
866
867	// Explicit construction for strings like "foo\0bar".
868	template <size_t N>
869	static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
870	return StringLiteral(Str, N - `1`);
871	}
872	};
873
874	/// @name StringRef Comparison Operators
875	/// @{
876
877	inline bool operator==(StringRef LHS, StringRef RHS) {
878	return LHS.equals(RHS);
879	}
880
881	inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
882
883	inline bool operator<(StringRef LHS, StringRef RHS) {
884	return LHS.compare(RHS) < `0`;
885	}
886
887	inline bool operator<=(StringRef LHS, StringRef RHS) {
888	return LHS.compare(RHS) <= `0`;
889	}
890
891	inline bool operator>(StringRef LHS, StringRef RHS) {
892	return LHS.compare(RHS) > `0`;
893	}
894
895	inline bool operator>=(StringRef LHS, StringRef RHS) {
896	return LHS.compare(RHS) >= `0`;
897	}
898
899	inline std::string &operator+=(std::string &buffer, StringRef string) {
900	return buffer.append(s: string.data(), n: string.size());
901	}
902
903	/// @}
904
905	/// Compute a hash_code for a StringRef.
906	[[nodiscard]] hash_code hash_value(StringRef S);
907
908	// Provide DenseMapInfo for StringRefs.
909	template <> struct DenseMapInfo<StringRef, void> {
910	static inline StringRef getEmptyKey() {
911	return StringRef (
912	reinterpret_cast<const char >(~static_cast*<uintptr_t>(`0`)), `0`);
913	}
914
915	static inline StringRef getTombstoneKey() {
916	return StringRef (
917	reinterpret_cast<const char >(~static_cast*<uintptr_t>(`1`)), `0`);
918	}
919
920	static unsigned getHashValue(StringRef Val);
921
922	static bool isEqual(StringRef LHS, StringRef RHS) {
923	if (RHS.data() == getEmptyKey().data())
924	return LHS.data() == getEmptyKey().data();
925	if (RHS.data() == getTombstoneKey().data())
926	return LHS.data() == getTombstoneKey().data();
927	return LHS == RHS;
928	}
929	};
930
931	} // end namespace llvm
932
933	#endif // LLVM_ADT_STRINGREF_H
934

source code of llvm/include/llvm/ADT/StringRef.h