unistr.h source code [include/unicode/unistr.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 1998-2016, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	*
9	* File unistr.h
10	*
11	* Modification History:
12	*
13	* Date Name Description
14	* 09/25/98 stephen Creation.
15	* 11/11/98 stephen Changed per 11/9 code review.
16	* 04/20/99 stephen Overhauled per 4/16 code review.
17	* 11/18/99 aliu Made to inherit from Replaceable. Added method
18	* handleReplaceBetween(); other methods unchanged.
19	* 06/25/01 grhoten Remove dependency on iostream.
20	******************************************************************************
21	*/
22
23	#ifndef UNISTR_H
24	#define UNISTR_H
25
26	/**
27	* \file
28	* \brief C++ API: Unicode String
29	*/
30
31	#include "unicode/utypes.h"
32
33	#if U_SHOW_CPLUSPLUS_API
34
35	#include <cstddef>
36	#include "unicode/char16ptr.h"
37	#include "unicode/rep.h"
38	#include "unicode/std_string.h"
39	#include "unicode/stringpiece.h"
40	#include "unicode/bytestream.h"
41
42	struct UConverter; // unicode/ucnv.h
43
44	#ifndef USTRING_H
45	/**
46	* \ingroup ustring_ustrlen
47	* @param s Pointer to sequence of UChars.
48	* @return Length of sequence.
49	*/
50	U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
51	#endif
52
53	U_NAMESPACE_BEGIN
54
55	#if !UCONFIG_NO_BREAK_ITERATION
56	class BreakIterator; // unicode/brkiter.h
57	#endif
58	class Edits;
59
60	U_NAMESPACE_END
61
62	// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
63	/**
64	* Internal string case mapping function type.
65	* All error checking must be done.
66	* src and dest must not overlap.
67	* @internal
68	*/
69	typedef int32_t U_CALLCONV
70	UStringCaseMapper(int32_t caseLocale, uint32_t options,
71	#if !UCONFIG_NO_BREAK_ITERATION
72	icu::BreakIterator *iter,
73	#endif
74	char16_t *dest, int32_t destCapacity,
75	const char16_t *src, int32_t srcLength,
76	icu::Edits *edits,
77	UErrorCode &errorCode);
78
79	U_NAMESPACE_BEGIN
80
81	class Locale; // unicode/locid.h
82	class StringCharacterIterator;
83	class UnicodeStringAppendable; // unicode/appendable.h
84
85	/ The <iostream> include has been moved to unicode/ustream.h /
86
87	/**
88	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
89	* which constructs a Unicode string from an invariant-character char * string.
90	* About invariant characters see utypes.h.
91	* This constructor has no runtime dependency on conversion code and is
92	* therefore recommended over ones taking a charset name string
93	* (where the empty string "" indicates invariant-character conversion).
94	*
95	* @stable ICU 3.2
96	*/
97	#define US_INV icu::UnicodeString::kInvariant
98
99	/**
100	* Unicode String literals in C++.
101	*
102	* Note: these macros are not recommended for new code.
103	* Prior to the availability of C++11 and u"unicode string literals",
104	* these macros were provided for portability and efficiency when
105	* initializing UnicodeStrings from literals.
106	*
107	* They work only for strings that contain "invariant characters", i.e.,
108	* only latin letters, digits, and some punctuation.
109	* See utypes.h for details.
110	*
111	* The string parameter must be a C string literal.
112	* The length of the string, not including the terminating
113	* `NUL`, must be specified as a constant.
114	* @stable ICU 2.0
115	*/
116	#if !U_CHAR16_IS_TYPEDEF
117	# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
118	#else
119	# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
120	#endif
121
122	/**
123	* Unicode String literals in C++.
124	* Dependent on the platform properties, different UnicodeString
125	* constructors should be used to create a UnicodeString object from
126	* a string literal.
127	* The macros are defined for improved performance.
128	* They work only for strings that contain "invariant characters", i.e.,
129	* only latin letters, digits, and some punctuation.
130	* See utypes.h for details.
131	*
132	* The string parameter must be a C string literal.
133	* @stable ICU 2.0
134	*/
135	#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
136
137	/**
138	* \def UNISTR_FROM_CHAR_EXPLICIT
139	* This can be defined to be empty or "explicit".
140	* If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
141	* constructors are marked as explicit, preventing their inadvertent use.
142	* @stable ICU 49
143	*/
144	#ifndef UNISTR_FROM_CHAR_EXPLICIT
145	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
146	// Auto-"explicit" in ICU library code.
147	# define UNISTR_FROM_CHAR_EXPLICIT explicit
148	# else
149	// Empty by default for source code compatibility.
150	# define UNISTR_FROM_CHAR_EXPLICIT
151	# endif
152	#endif
153
154	/**
155	* \def UNISTR_FROM_STRING_EXPLICIT
156	* This can be defined to be empty or "explicit".
157	* If explicit, then the UnicodeString(const char ) and UnicodeString(const char16_t )
158	* constructors are marked as explicit, preventing their inadvertent use.
159	*
160	* In particular, this helps prevent accidentally depending on ICU conversion code
161	* by passing a string literal into an API with a const UnicodeString & parameter.
162	* @stable ICU 49
163	*/
164	#ifndef UNISTR_FROM_STRING_EXPLICIT
165	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
166	// Auto-"explicit" in ICU library code.
167	# define UNISTR_FROM_STRING_EXPLICIT explicit
168	# else
169	// Empty by default for source code compatibility.
170	# define UNISTR_FROM_STRING_EXPLICIT
171	# endif
172	#endif
173
174	/**
175	* \def UNISTR_OBJECT_SIZE
176	* Desired sizeof(UnicodeString) in bytes.
177	* It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
178	* The object size may want to be a multiple of 16 bytes,
179	* which is a common granularity for heap allocation.
180	*
181	* Any space inside the object beyond sizeof(vtable pointer) + 2
182	* is available for storing short strings inside the object.
183	* The bigger the object, the longer a string that can be stored inside the object,
184	* without additional heap allocation.
185	*
186	* Depending on a platform's pointer size, pointer alignment requirements,
187	* and struct padding, the compiler will usually round up sizeof(UnicodeString)
188	* to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
189	* to hold the fields for heap-allocated strings.
190	* Such a minimum size also ensures that the object is easily large enough
191	* to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
192	*
193	* sizeof(UnicodeString) >= 48 should work for all known platforms.
194	*
195	* For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
196	* sizeof(UnicodeString) = 64 would leave space for
197	* (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
198	* char16_ts stored inside the object.
199	*
200	* The minimum object size on a 64-bit machine would be
201	* 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
202	* and the internal buffer would hold up to 11 char16_ts in that case.
203	*
204	* @see U16_MAX_LENGTH
205	* @stable ICU 56
206	*/
207	#ifndef UNISTR_OBJECT_SIZE
208	# define UNISTR_OBJECT_SIZE 64
209	#endif
210
211	/**
212	* UnicodeString is a string class that stores Unicode characters directly and provides
213	* similar functionality as the Java String and StringBuffer/StringBuilder classes.
214	* It is a concrete implementation of the abstract class Replaceable (for transliteration).
215	*
216	* The UnicodeString equivalent of std::string’s clear() is remove().
217	*
218	* A UnicodeString may "alias" an external array of characters
219	* (that is, point to it, rather than own the array)
220	* whose lifetime must then at least match the lifetime of the aliasing object.
221	* This aliasing may be preserved when returning a UnicodeString by value,
222	* depending on the compiler and the function implementation,
223	* via Return Value Optimization (RVO) or the move assignment operator.
224	* (However, the copy assignment operator does not preserve aliasing.)
225	* For details see the description of storage models at the end of the class API docs
226	* and in the User Guide chapter linked from there.
227	*
228	* The UnicodeString class is not suitable for subclassing.
229	*
230	* For an overview of Unicode strings in C and C++ see the
231	* [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#strings-in-cc).
232	*
233	* In ICU, a Unicode string consists of 16-bit Unicode code units.
234	* A Unicode character may be stored with either one code unit
235	* (the most common case) or with a matched pair of special code units
236	* ("surrogates"). The data type for code units is char16_t.
237	* For single-character handling, a Unicode character code point is a value
238	* in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
239	*
240	* Indexes and offsets into and lengths of strings always count code units, not code points.
241	* This is the same as with multi-byte char* strings in traditional string handling.
242	* Operations on partial strings typically do not test for code point boundaries.
243	* If necessary, the user needs to take care of such boundaries by testing for the code unit
244	* values or by using functions like
245	* UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
246	* (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).
247	*
248	* UnicodeString methods are more lenient with regard to input parameter values
249	* than other ICU APIs. In particular:
250	* - If indexes are out of bounds for a UnicodeString object
251	* (< 0 or > length()) then they are "pinned" to the nearest boundary.
252	* - If the buffer passed to an insert/append/replace operation is owned by the
253	* target object, e.g., calling str.append(str), an extra copy may take place
254	* to ensure safety.
255	* - If primitive string pointer values (e.g., const char16_t * or char *)
256	* for input strings are nullptr, then those input string parameters are treated
257	* as if they pointed to an empty string.
258	* However, this is not the case for char * parameters for charset names
259	* or other IDs.
260	* - Most UnicodeString methods do not take a UErrorCode parameter because
261	* there are usually very few opportunities for failure other than a shortage
262	* of memory, error codes in low-level C++ string methods would be inconvenient,
263	* and the error code as the last parameter (ICU convention) would prevent
264	* the use of default parameter values.
265	* Instead, such methods set the UnicodeString into a "bogus" state
266	* (see isBogus()) if an error occurs.
267	*
268	* In string comparisons, two UnicodeString objects that are both "bogus"
269	* compare equal (to be transitive and prevent endless loops in sorting),
270	* and a "bogus" string compares less than any non-"bogus" one.
271	*
272	* Const UnicodeString methods are thread-safe. Multiple threads can use
273	* const methods on the same UnicodeString object simultaneously,
274	* but non-const methods must not be called concurrently (in multiple threads)
275	* with any other (const or non-const) methods.
276	*
277	* Similarly, const UnicodeString & parameters are thread-safe.
278	* One object may be passed in as such a parameter concurrently in multiple threads.
279	* This includes the const UnicodeString & parameters for
280	* copy construction, assignment, and cloning.
281	*
282	* UnicodeString uses several storage methods.
283	* String contents can be stored inside the UnicodeString object itself,
284	* in an allocated and shared buffer, or in an outside buffer that is "aliased".
285	* Most of this is done transparently, but careful aliasing in particular provides
286	* significant performance improvements.
287	* Also, the internal buffer is accessible via special functions.
288	* For details see the
289	* [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#maximizing-performance-with-the-unicodestring-storage-model).
290	*
291	* @see utf.h
292	* @see CharacterIterator
293	* @stable ICU 2.0
294	*/
295	class U_COMMON_API UnicodeString : public Replaceable
296	{
297	public:
298
299	/**
300	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
301	* which constructs a Unicode string from an invariant-character char * string.
302	* Use the macro US_INV instead of the full qualification for this value.
303	*
304	* @see US_INV
305	* @stable ICU 3.2
306	*/
307	enum EInvariant {
308	/**
309	* @see EInvariant
310	* @stable ICU 3.2
311	*/
312	kInvariant
313	};
314
315	//========================================
316	// Read-only operations
317	//========================================
318
319	/ Comparison - bitwise only - for international comparison use collation /
320
321	/**
322	* Equality operator. Performs only bitwise comparison.
323	* @param text The UnicodeString to compare to this one.
324	* @return true if `text` contains the same characters as this one,
325	* false otherwise.
326	* @stable ICU 2.0
327	*/
328	inline bool operator== (const UnicodeString& text) const;
329
330	/**
331	* Inequality operator. Performs only bitwise comparison.
332	* @param text The UnicodeString to compare to this one.
333	* @return false if `text` contains the same characters as this one,
334	* true otherwise.
335	* @stable ICU 2.0
336	*/
337	inline bool operator!= (const UnicodeString& text) const;
338
339	/**
340	* Greater than operator. Performs only bitwise comparison.
341	* @param text The UnicodeString to compare to this one.
342	* @return true if the characters in this are bitwise
343	* greater than the characters in `text`, false otherwise
344	* @stable ICU 2.0
345	*/
346	inline UBool operator> (const UnicodeString& text) const;
347
348	/**
349	* Less than operator. Performs only bitwise comparison.
350	* @param text The UnicodeString to compare to this one.
351	* @return true if the characters in this are bitwise
352	* less than the characters in `text`, false otherwise
353	* @stable ICU 2.0
354	*/
355	inline UBool operator< (const UnicodeString& text) const;
356
357	/**
358	* Greater than or equal operator. Performs only bitwise comparison.
359	* @param text The UnicodeString to compare to this one.
360	* @return true if the characters in this are bitwise
361	* greater than or equal to the characters in `text`, false otherwise
362	* @stable ICU 2.0
363	*/
364	inline UBool operator>= (const UnicodeString& text) const;
365
366	/**
367	* Less than or equal operator. Performs only bitwise comparison.
368	* @param text The UnicodeString to compare to this one.
369	* @return true if the characters in this are bitwise
370	* less than or equal to the characters in `text`, false otherwise
371	* @stable ICU 2.0
372	*/
373	inline UBool operator<= (const UnicodeString& text) const;
374
375	/**
376	* Compare the characters bitwise in this UnicodeString to
377	* the characters in `text`.
378	* @param text The UnicodeString to compare to this one.
379	* @return The result of bitwise character comparison: 0 if this
380	* contains the same characters as `text`, -1 if the characters in
381	* this are bitwise less than the characters in `text`, +1 if the
382	* characters in this are bitwise greater than the characters
383	* in `text`.
384	* @stable ICU 2.0
385	*/
386	inline int8_t compare(const UnicodeString& text) const;
387
388	/**
389	* Compare the characters bitwise in the range
390	* [`start`, `start + length`) with the characters
391	* in the entire string `text`.
392	* (The parameters "start" and "length" are not applied to the other text "text".)
393	* @param start the offset at which the compare operation begins
394	* @param length the number of characters of text to compare.
395	* @param text the other text to be compared against this string.
396	* @return The result of bitwise character comparison: 0 if this
397	* contains the same characters as `text`, -1 if the characters in
398	* this are bitwise less than the characters in `text`, +1 if the
399	* characters in this are bitwise greater than the characters
400	* in `text`.
401	* @stable ICU 2.0
402	*/
403	inline int8_t compare(int32_t start,
404	int32_t length,
405	const UnicodeString& text) const;
406
407	/**
408	* Compare the characters bitwise in the range
409	* [`start`, `start + length`) with the characters
410	* in `srcText` in the range
411	* [`srcStart`, `srcStart + srcLength`).
412	* @param start the offset at which the compare operation begins
413	* @param length the number of characters in this to compare.
414	* @param srcText the text to be compared
415	* @param srcStart the offset into `srcText` to start comparison
416	* @param srcLength the number of characters in `src` to compare
417	* @return The result of bitwise character comparison: 0 if this
418	* contains the same characters as `srcText`, -1 if the characters in
419	* this are bitwise less than the characters in `srcText`, +1 if the
420	* characters in this are bitwise greater than the characters
421	* in `srcText`.
422	* @stable ICU 2.0
423	*/
424	inline int8_t compare(int32_t start,
425	int32_t length,
426	const UnicodeString& srcText,
427	int32_t srcStart,
428	int32_t srcLength) const;
429
430	/**
431	* Compare the characters bitwise in this UnicodeString with the first
432	* `srcLength` characters in `srcChars`.
433	* @param srcChars The characters to compare to this UnicodeString.
434	* @param srcLength the number of characters in `srcChars` to compare
435	* @return The result of bitwise character comparison: 0 if this
436	* contains the same characters as `srcChars`, -1 if the characters in
437	* this are bitwise less than the characters in `srcChars`, +1 if the
438	* characters in this are bitwise greater than the characters
439	* in `srcChars`.
440	* @stable ICU 2.0
441	*/
442	inline int8_t compare(ConstChar16Ptr srcChars,
443	int32_t srcLength) const;
444
445	/**
446	* Compare the characters bitwise in the range
447	* [`start`, `start + length`) with the first
448	* `length` characters in `srcChars`
449	* @param start the offset at which the compare operation begins
450	* @param length the number of characters to compare.
451	* @param srcChars the characters to be compared
452	* @return The result of bitwise character comparison: 0 if this
453	* contains the same characters as `srcChars`, -1 if the characters in
454	* this are bitwise less than the characters in `srcChars`, +1 if the
455	* characters in this are bitwise greater than the characters
456	* in `srcChars`.
457	* @stable ICU 2.0
458	*/
459	inline int8_t compare(int32_t start,
460	int32_t length,
461	const char16_t srcChars) const*;
462
463	/**
464	* Compare the characters bitwise in the range
465	* [`start`, `start + length`) with the characters
466	* in `srcChars` in the range
467	* [`srcStart`, `srcStart + srcLength`).
468	* @param start the offset at which the compare operation begins
469	* @param length the number of characters in this to compare
470	* @param srcChars the characters to be compared
471	* @param srcStart the offset into `srcChars` to start comparison
472	* @param srcLength the number of characters in `srcChars` to compare
473	* @return The result of bitwise character comparison: 0 if this
474	* contains the same characters as `srcChars`, -1 if the characters in
475	* this are bitwise less than the characters in `srcChars`, +1 if the
476	* characters in this are bitwise greater than the characters
477	* in `srcChars`.
478	* @stable ICU 2.0
479	*/
480	inline int8_t compare(int32_t start,
481	int32_t length,
482	const char16_t *srcChars,
483	int32_t srcStart,
484	int32_t srcLength) const;
485
486	/**
487	* Compare the characters bitwise in the range
488	* [`start`, `limit`) with the characters
489	* in `srcText` in the range
490	* [`srcStart`, `srcLimit`).
491	* @param start the offset at which the compare operation begins
492	* @param limit the offset immediately following the compare operation
493	* @param srcText the text to be compared
494	* @param srcStart the offset into `srcText` to start comparison
495	* @param srcLimit the offset into `srcText` to limit comparison
496	* @return The result of bitwise character comparison: 0 if this
497	* contains the same characters as `srcText`, -1 if the characters in
498	* this are bitwise less than the characters in `srcText`, +1 if the
499	* characters in this are bitwise greater than the characters
500	* in `srcText`.
501	* @stable ICU 2.0
502	*/
503	inline int8_t compareBetween(int32_t start,
504	int32_t limit,
505	const UnicodeString& srcText,
506	int32_t srcStart,
507	int32_t srcLimit) const;
508
509	/**
510	* Compare two Unicode strings in code point order.
511	* The result may be different from the results of compare(), operator<, etc.
512	* if supplementary characters are present:
513	*
514	* In UTF-16, supplementary characters (with code points U+10000 and above) are
515	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
516	* which means that they compare as less than some other BMP characters like U+feff.
517	* This function compares Unicode strings in code point order.
518	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
519	*
520	* @param text Another string to compare this one to.
521	* @return a negative/zero/positive integer corresponding to whether
522	* this string is less than/equal to/greater than the second one
523	* in code point order
524	* @stable ICU 2.0
525	*/
526	inline int8_t compareCodePointOrder(const UnicodeString& text) const;
527
528	/**
529	* Compare two Unicode strings in code point order.
530	* The result may be different from the results of compare(), operator<, etc.
531	* if supplementary characters are present:
532	*
533	* In UTF-16, supplementary characters (with code points U+10000 and above) are
534	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
535	* which means that they compare as less than some other BMP characters like U+feff.
536	* This function compares Unicode strings in code point order.
537	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
538	*
539	* @param start The start offset in this string at which the compare operation begins.
540	* @param length The number of code units from this string to compare.
541	* @param srcText Another string to compare this one to.
542	* @return a negative/zero/positive integer corresponding to whether
543	* this string is less than/equal to/greater than the second one
544	* in code point order
545	* @stable ICU 2.0
546	*/
547	inline int8_t compareCodePointOrder(int32_t start,
548	int32_t length,
549	const UnicodeString& srcText) const;
550
551	/**
552	* Compare two Unicode strings in code point order.
553	* The result may be different from the results of compare(), operator<, etc.
554	* if supplementary characters are present:
555	*
556	* In UTF-16, supplementary characters (with code points U+10000 and above) are
557	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
558	* which means that they compare as less than some other BMP characters like U+feff.
559	* This function compares Unicode strings in code point order.
560	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
561	*
562	* @param start The start offset in this string at which the compare operation begins.
563	* @param length The number of code units from this string to compare.
564	* @param srcText Another string to compare this one to.
565	* @param srcStart The start offset in that string at which the compare operation begins.
566	* @param srcLength The number of code units from that string to compare.
567	* @return a negative/zero/positive integer corresponding to whether
568	* this string is less than/equal to/greater than the second one
569	* in code point order
570	* @stable ICU 2.0
571	*/
572	inline int8_t compareCodePointOrder(int32_t start,
573	int32_t length,
574	const UnicodeString& srcText,
575	int32_t srcStart,
576	int32_t srcLength) const;
577
578	/**
579	* Compare two Unicode strings in code point order.
580	* The result may be different from the results of compare(), operator<, etc.
581	* if supplementary characters are present:
582	*
583	* In UTF-16, supplementary characters (with code points U+10000 and above) are
584	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
585	* which means that they compare as less than some other BMP characters like U+feff.
586	* This function compares Unicode strings in code point order.
587	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
588	*
589	* @param srcChars A pointer to another string to compare this one to.
590	* @param srcLength The number of code units from that string to compare.
591	* @return a negative/zero/positive integer corresponding to whether
592	* this string is less than/equal to/greater than the second one
593	* in code point order
594	* @stable ICU 2.0
595	*/
596	inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
597	int32_t srcLength) const;
598
599	/**
600	* Compare two Unicode strings in code point order.
601	* The result may be different from the results of compare(), operator<, etc.
602	* if supplementary characters are present:
603	*
604	* In UTF-16, supplementary characters (with code points U+10000 and above) are
605	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
606	* which means that they compare as less than some other BMP characters like U+feff.
607	* This function compares Unicode strings in code point order.
608	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
609	*
610	* @param start The start offset in this string at which the compare operation begins.
611	* @param length The number of code units from this string to compare.
612	* @param srcChars A pointer to another string to compare this one to.
613	* @return a negative/zero/positive integer corresponding to whether
614	* this string is less than/equal to/greater than the second one
615	* in code point order
616	* @stable ICU 2.0
617	*/
618	inline int8_t compareCodePointOrder(int32_t start,
619	int32_t length,
620	const char16_t srcChars) const*;
621
622	/**
623	* Compare two Unicode strings in code point order.
624	* The result may be different from the results of compare(), operator<, etc.
625	* if supplementary characters are present:
626	*
627	* In UTF-16, supplementary characters (with code points U+10000 and above) are
628	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
629	* which means that they compare as less than some other BMP characters like U+feff.
630	* This function compares Unicode strings in code point order.
631	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
632	*
633	* @param start The start offset in this string at which the compare operation begins.
634	* @param length The number of code units from this string to compare.
635	* @param srcChars A pointer to another string to compare this one to.
636	* @param srcStart The start offset in that string at which the compare operation begins.
637	* @param srcLength The number of code units from that string to compare.
638	* @return a negative/zero/positive integer corresponding to whether
639	* this string is less than/equal to/greater than the second one
640	* in code point order
641	* @stable ICU 2.0
642	*/
643	inline int8_t compareCodePointOrder(int32_t start,
644	int32_t length,
645	const char16_t *srcChars,
646	int32_t srcStart,
647	int32_t srcLength) const;
648
649	/**
650	* Compare two Unicode strings in code point order.
651	* The result may be different from the results of compare(), operator<, etc.
652	* if supplementary characters are present:
653	*
654	* In UTF-16, supplementary characters (with code points U+10000 and above) are
655	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
656	* which means that they compare as less than some other BMP characters like U+feff.
657	* This function compares Unicode strings in code point order.
658	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
659	*
660	* @param start The start offset in this string at which the compare operation begins.
661	* @param limit The offset after the last code unit from this string to compare.
662	* @param srcText Another string to compare this one to.
663	* @param srcStart The start offset in that string at which the compare operation begins.
664	* @param srcLimit The offset after the last code unit from that string to compare.
665	* @return a negative/zero/positive integer corresponding to whether
666	* this string is less than/equal to/greater than the second one
667	* in code point order
668	* @stable ICU 2.0
669	*/
670	inline int8_t compareCodePointOrderBetween(int32_t start,
671	int32_t limit,
672	const UnicodeString& srcText,
673	int32_t srcStart,
674	int32_t srcLimit) const;
675
676	/**
677	* Compare two strings case-insensitively using full case folding.
678	* This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
679	*
680	* @param text Another string to compare this one to.
681	* @param options A bit set of options:
682	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
683	* Comparison in code unit order with default case folding.
684	*
685	* - U_COMPARE_CODE_POINT_ORDER
686	* Set to choose code point order instead of code unit order
687	* (see u_strCompare for details).
688	*
689	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
690	*
691	* @return A negative, zero, or positive integer indicating the comparison result.
692	* @stable ICU 2.0
693	*/
694	inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
695
696	/**
697	* Compare two strings case-insensitively using full case folding.
698	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
699	*
700	* @param start The start offset in this string at which the compare operation begins.
701	* @param length The number of code units from this string to compare.
702	* @param srcText Another string to compare this one to.
703	* @param options A bit set of options:
704	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
705	* Comparison in code unit order with default case folding.
706	*
707	* - U_COMPARE_CODE_POINT_ORDER
708	* Set to choose code point order instead of code unit order
709	* (see u_strCompare for details).
710	*
711	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
712	*
713	* @return A negative, zero, or positive integer indicating the comparison result.
714	* @stable ICU 2.0
715	*/
716	inline int8_t caseCompare(int32_t start,
717	int32_t length,
718	const UnicodeString& srcText,
719	uint32_t options) const;
720
721	/**
722	* Compare two strings case-insensitively using full case folding.
723	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
724	*
725	* @param start The start offset in this string at which the compare operation begins.
726	* @param length The number of code units from this string to compare.
727	* @param srcText Another string to compare this one to.
728	* @param srcStart The start offset in that string at which the compare operation begins.
729	* @param srcLength The number of code units from that string to compare.
730	* @param options A bit set of options:
731	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
732	* Comparison in code unit order with default case folding.
733	*
734	* - U_COMPARE_CODE_POINT_ORDER
735	* Set to choose code point order instead of code unit order
736	* (see u_strCompare for details).
737	*
738	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
739	*
740	* @return A negative, zero, or positive integer indicating the comparison result.
741	* @stable ICU 2.0
742	*/
743	inline int8_t caseCompare(int32_t start,
744	int32_t length,
745	const UnicodeString& srcText,
746	int32_t srcStart,
747	int32_t srcLength,
748	uint32_t options) const;
749
750	/**
751	* Compare two strings case-insensitively using full case folding.
752	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
753	*
754	* @param srcChars A pointer to another string to compare this one to.
755	* @param srcLength The number of code units from that string to compare.
756	* @param options A bit set of options:
757	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
758	* Comparison in code unit order with default case folding.
759	*
760	* - U_COMPARE_CODE_POINT_ORDER
761	* Set to choose code point order instead of code unit order
762	* (see u_strCompare for details).
763	*
764	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
765	*
766	* @return A negative, zero, or positive integer indicating the comparison result.
767	* @stable ICU 2.0
768	*/
769	inline int8_t caseCompare(ConstChar16Ptr srcChars,
770	int32_t srcLength,
771	uint32_t options) const;
772
773	/**
774	* Compare two strings case-insensitively using full case folding.
775	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
776	*
777	* @param start The start offset in this string at which the compare operation begins.
778	* @param length The number of code units from this string to compare.
779	* @param srcChars A pointer to another string to compare this one to.
780	* @param options A bit set of options:
781	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
782	* Comparison in code unit order with default case folding.
783	*
784	* - U_COMPARE_CODE_POINT_ORDER
785	* Set to choose code point order instead of code unit order
786	* (see u_strCompare for details).
787	*
788	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
789	*
790	* @return A negative, zero, or positive integer indicating the comparison result.
791	* @stable ICU 2.0
792	*/
793	inline int8_t caseCompare(int32_t start,
794	int32_t length,
795	const char16_t *srcChars,
796	uint32_t options) const;
797
798	/**
799	* Compare two strings case-insensitively using full case folding.
800	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
801	*
802	* @param start The start offset in this string at which the compare operation begins.
803	* @param length The number of code units from this string to compare.
804	* @param srcChars A pointer to another string to compare this one to.
805	* @param srcStart The start offset in that string at which the compare operation begins.
806	* @param srcLength The number of code units from that string to compare.
807	* @param options A bit set of options:
808	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
809	* Comparison in code unit order with default case folding.
810	*
811	* - U_COMPARE_CODE_POINT_ORDER
812	* Set to choose code point order instead of code unit order
813	* (see u_strCompare for details).
814	*
815	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
816	*
817	* @return A negative, zero, or positive integer indicating the comparison result.
818	* @stable ICU 2.0
819	*/
820	inline int8_t caseCompare(int32_t start,
821	int32_t length,
822	const char16_t *srcChars,
823	int32_t srcStart,
824	int32_t srcLength,
825	uint32_t options) const;
826
827	/**
828	* Compare two strings case-insensitively using full case folding.
829	* This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
830	*
831	* @param start The start offset in this string at which the compare operation begins.
832	* @param limit The offset after the last code unit from this string to compare.
833	* @param srcText Another string to compare this one to.
834	* @param srcStart The start offset in that string at which the compare operation begins.
835	* @param srcLimit The offset after the last code unit from that string to compare.
836	* @param options A bit set of options:
837	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
838	* Comparison in code unit order with default case folding.
839	*
840	* - U_COMPARE_CODE_POINT_ORDER
841	* Set to choose code point order instead of code unit order
842	* (see u_strCompare for details).
843	*
844	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
845	*
846	* @return A negative, zero, or positive integer indicating the comparison result.
847	* @stable ICU 2.0
848	*/
849	inline int8_t caseCompareBetween(int32_t start,
850	int32_t limit,
851	const UnicodeString& srcText,
852	int32_t srcStart,
853	int32_t srcLimit,
854	uint32_t options) const;
855
856	/**
857	* Determine if this starts with the characters in `text`
858	* @param text The text to match.
859	* @return true if this starts with the characters in `text`,
860	* false otherwise
861	* @stable ICU 2.0
862	*/
863	inline UBool startsWith(const UnicodeString& text) const;
864
865	/**
866	* Determine if this starts with the characters in `srcText`
867	* in the range [`srcStart`, `srcStart + srcLength`).
868	* @param srcText The text to match.
869	* @param srcStart the offset into `srcText` to start matching
870	* @param srcLength the number of characters in `srcText` to match
871	* @return true if this starts with the characters in `text`,
872	* false otherwise
873	* @stable ICU 2.0
874	*/
875	inline UBool startsWith(const UnicodeString& srcText,
876	int32_t srcStart,
877	int32_t srcLength) const;
878
879	/**
880	* Determine if this starts with the characters in `srcChars`
881	* @param srcChars The characters to match.
882	* @param srcLength the number of characters in `srcChars`
883	* @return true if this starts with the characters in `srcChars`,
884	* false otherwise
885	* @stable ICU 2.0
886	*/
887	inline UBool startsWith(ConstChar16Ptr srcChars,
888	int32_t srcLength) const;
889
890	/**
891	* Determine if this ends with the characters in `srcChars`
892	* in the range [`srcStart`, `srcStart + srcLength`).
893	* @param srcChars The characters to match.
894	* @param srcStart the offset into `srcText` to start matching
895	* @param srcLength the number of characters in `srcChars` to match
896	* @return true if this ends with the characters in `srcChars`, false otherwise
897	* @stable ICU 2.0
898	*/
899	inline UBool startsWith(const char16_t *srcChars,
900	int32_t srcStart,
901	int32_t srcLength) const;
902
903	/**
904	* Determine if this ends with the characters in `text`
905	* @param text The text to match.
906	* @return true if this ends with the characters in `text`,
907	* false otherwise
908	* @stable ICU 2.0
909	*/
910	inline UBool endsWith(const UnicodeString& text) const;
911
912	/**
913	* Determine if this ends with the characters in `srcText`
914	* in the range [`srcStart`, `srcStart + srcLength`).
915	* @param srcText The text to match.
916	* @param srcStart the offset into `srcText` to start matching
917	* @param srcLength the number of characters in `srcText` to match
918	* @return true if this ends with the characters in `text`,
919	* false otherwise
920	* @stable ICU 2.0
921	*/
922	inline UBool endsWith(const UnicodeString& srcText,
923	int32_t srcStart,
924	int32_t srcLength) const;
925
926	/**
927	* Determine if this ends with the characters in `srcChars`
928	* @param srcChars The characters to match.
929	* @param srcLength the number of characters in `srcChars`
930	* @return true if this ends with the characters in `srcChars`,
931	* false otherwise
932	* @stable ICU 2.0
933	*/
934	inline UBool endsWith(ConstChar16Ptr srcChars,
935	int32_t srcLength) const;
936
937	/**
938	* Determine if this ends with the characters in `srcChars`
939	* in the range [`srcStart`, `srcStart + srcLength`).
940	* @param srcChars The characters to match.
941	* @param srcStart the offset into `srcText` to start matching
942	* @param srcLength the number of characters in `srcChars` to match
943	* @return true if this ends with the characters in `srcChars`,
944	* false otherwise
945	* @stable ICU 2.0
946	*/
947	inline UBool endsWith(const char16_t *srcChars,
948	int32_t srcStart,
949	int32_t srcLength) const;
950
951
952	/ Searching - bitwise only /
953
954	/**
955	* Locate in this the first occurrence of the characters in `text`,
956	* using bitwise comparison.
957	* @param text The text to search for.
958	* @return The offset into this of the start of `text`,
959	* or -1 if not found.
960	* @stable ICU 2.0
961	*/
962	inline int32_t indexOf(const UnicodeString& text) const;
963
964	/**
965	* Locate in this the first occurrence of the characters in `text`
966	* starting at offset `start`, using bitwise comparison.
967	* @param text The text to search for.
968	* @param start The offset at which searching will start.
969	* @return The offset into this of the start of `text`,
970	* or -1 if not found.
971	* @stable ICU 2.0
972	*/
973	inline int32_t indexOf(const UnicodeString& text,
974	int32_t start) const;
975
976	/**
977	* Locate in this the first occurrence in the range
978	* [`start`, `start + length`) of the characters
979	* in `text`, using bitwise comparison.
980	* @param text The text to search for.
981	* @param start The offset at which searching will start.
982	* @param length The number of characters to search
983	* @return The offset into this of the start of `text`,
984	* or -1 if not found.
985	* @stable ICU 2.0
986	*/
987	inline int32_t indexOf(const UnicodeString& text,
988	int32_t start,
989	int32_t length) const;
990
991	/**
992	* Locate in this the first occurrence in the range
993	* [`start`, `start + length`) of the characters
994	* in `srcText` in the range
995	* [`srcStart`, `srcStart + srcLength`),
996	* using bitwise comparison.
997	* @param srcText The text to search for.
998	* @param srcStart the offset into `srcText` at which
999	* to start matching
1000	* @param srcLength the number of characters in `srcText` to match
1001	* @param start the offset into this at which to start matching
1002	* @param length the number of characters in this to search
1003	* @return The offset into this of the start of `text`,
1004	* or -1 if not found.
1005	* @stable ICU 2.0
1006	*/
1007	inline int32_t indexOf(const UnicodeString& srcText,
1008	int32_t srcStart,
1009	int32_t srcLength,
1010	int32_t start,
1011	int32_t length) const;
1012
1013	/**
1014	* Locate in this the first occurrence of the characters in
1015	* `srcChars`
1016	* starting at offset `start`, using bitwise comparison.
1017	* @param srcChars The text to search for.
1018	* @param srcLength the number of characters in `srcChars` to match
1019	* @param start the offset into this at which to start matching
1020	* @return The offset into this of the start of `text`,
1021	* or -1 if not found.
1022	* @stable ICU 2.0
1023	*/
1024	inline int32_t indexOf(const char16_t *srcChars,
1025	int32_t srcLength,
1026	int32_t start) const;
1027
1028	/**
1029	* Locate in this the first occurrence in the range
1030	* [`start`, `start + length`) of the characters
1031	* in `srcChars`, using bitwise comparison.
1032	* @param srcChars The text to search for.
1033	* @param srcLength the number of characters in `srcChars`
1034	* @param start The offset at which searching will start.
1035	* @param length The number of characters to search
1036	* @return The offset into this of the start of `srcChars`,
1037	* or -1 if not found.
1038	* @stable ICU 2.0
1039	*/
1040	inline int32_t indexOf(ConstChar16Ptr srcChars,
1041	int32_t srcLength,
1042	int32_t start,
1043	int32_t length) const;
1044
1045	/**
1046	* Locate in this the first occurrence in the range
1047	* [`start`, `start + length`) of the characters
1048	* in `srcChars` in the range
1049	* [`srcStart`, `srcStart + srcLength`),
1050	* using bitwise comparison.
1051	* @param srcChars The text to search for.
1052	* @param srcStart the offset into `srcChars` at which
1053	* to start matching
1054	* @param srcLength the number of characters in `srcChars` to match
1055	* @param start the offset into this at which to start matching
1056	* @param length the number of characters in this to search
1057	* @return The offset into this of the start of `text`,
1058	* or -1 if not found.
1059	* @stable ICU 2.0
1060	*/
1061	int32_t indexOf(const char16_t *srcChars,
1062	int32_t srcStart,
1063	int32_t srcLength,
1064	int32_t start,
1065	int32_t length) const;
1066
1067	/**
1068	* Locate in this the first occurrence of the BMP code point `c`,
1069	* using bitwise comparison.
1070	* @param c The code unit to search for.
1071	* @return The offset into this of `c`, or -1 if not found.
1072	* @stable ICU 2.0
1073	*/
1074	inline int32_t indexOf(char16_t c) const;
1075
1076	/**
1077	* Locate in this the first occurrence of the code point `c`,
1078	* using bitwise comparison.
1079	*
1080	* @param c The code point to search for.
1081	* @return The offset into this of `c`, or -1 if not found.
1082	* @stable ICU 2.0
1083	*/
1084	inline int32_t indexOf(UChar32 c) const;
1085
1086	/**
1087	* Locate in this the first occurrence of the BMP code point `c`,
1088	* starting at offset `start`, using bitwise comparison.
1089	* @param c The code unit to search for.
1090	* @param start The offset at which searching will start.
1091	* @return The offset into this of `c`, or -1 if not found.
1092	* @stable ICU 2.0
1093	*/
1094	inline int32_t indexOf(char16_t c,
1095	int32_t start) const;
1096
1097	/**
1098	* Locate in this the first occurrence of the code point `c`
1099	* starting at offset `start`, using bitwise comparison.
1100	*
1101	* @param c The code point to search for.
1102	* @param start The offset at which searching will start.
1103	* @return The offset into this of `c`, or -1 if not found.
1104	* @stable ICU 2.0
1105	*/
1106	inline int32_t indexOf(UChar32 c,
1107	int32_t start) const;
1108
1109	/**
1110	* Locate in this the first occurrence of the BMP code point `c`
1111	* in the range [`start`, `start + length`),
1112	* using bitwise comparison.
1113	* @param c The code unit to search for.
1114	* @param start the offset into this at which to start matching
1115	* @param length the number of characters in this to search
1116	* @return The offset into this of `c`, or -1 if not found.
1117	* @stable ICU 2.0
1118	*/
1119	inline int32_t indexOf(char16_t c,
1120	int32_t start,
1121	int32_t length) const;
1122
1123	/**
1124	* Locate in this the first occurrence of the code point `c`
1125	* in the range [`start`, `start + length`),
1126	* using bitwise comparison.
1127	*
1128	* @param c The code point to search for.
1129	* @param start the offset into this at which to start matching
1130	* @param length the number of characters in this to search
1131	* @return The offset into this of `c`, or -1 if not found.
1132	* @stable ICU 2.0
1133	*/
1134	inline int32_t indexOf(UChar32 c,
1135	int32_t start,
1136	int32_t length) const;
1137
1138	/**
1139	* Locate in this the last occurrence of the characters in `text`,
1140	* using bitwise comparison.
1141	* @param text The text to search for.
1142	* @return The offset into this of the start of `text`,
1143	* or -1 if not found.
1144	* @stable ICU 2.0
1145	*/
1146	inline int32_t lastIndexOf(const UnicodeString& text) const;
1147
1148	/**
1149	* Locate in this the last occurrence of the characters in `text`
1150	* starting at offset `start`, using bitwise comparison.
1151	* @param text The text to search for.
1152	* @param start The offset at which searching will start.
1153	* @return The offset into this of the start of `text`,
1154	* or -1 if not found.
1155	* @stable ICU 2.0
1156	*/
1157	inline int32_t lastIndexOf(const UnicodeString& text,
1158	int32_t start) const;
1159
1160	/**
1161	* Locate in this the last occurrence in the range
1162	* [`start`, `start + length`) of the characters
1163	* in `text`, using bitwise comparison.
1164	* @param text The text to search for.
1165	* @param start The offset at which searching will start.
1166	* @param length The number of characters to search
1167	* @return The offset into this of the start of `text`,
1168	* or -1 if not found.
1169	* @stable ICU 2.0
1170	*/
1171	inline int32_t lastIndexOf(const UnicodeString& text,
1172	int32_t start,
1173	int32_t length) const;
1174
1175	/**
1176	* Locate in this the last occurrence in the range
1177	* [`start`, `start + length`) of the characters
1178	* in `srcText` in the range
1179	* [`srcStart`, `srcStart + srcLength`),
1180	* using bitwise comparison.
1181	* @param srcText The text to search for.
1182	* @param srcStart the offset into `srcText` at which
1183	* to start matching
1184	* @param srcLength the number of characters in `srcText` to match
1185	* @param start the offset into this at which to start matching
1186	* @param length the number of characters in this to search
1187	* @return The offset into this of the start of `text`,
1188	* or -1 if not found.
1189	* @stable ICU 2.0
1190	*/
1191	inline int32_t lastIndexOf(const UnicodeString& srcText,
1192	int32_t srcStart,
1193	int32_t srcLength,
1194	int32_t start,
1195	int32_t length) const;
1196
1197	/**
1198	* Locate in this the last occurrence of the characters in `srcChars`
1199	* starting at offset `start`, using bitwise comparison.
1200	* @param srcChars The text to search for.
1201	* @param srcLength the number of characters in `srcChars` to match
1202	* @param start the offset into this at which to start matching
1203	* @return The offset into this of the start of `text`,
1204	* or -1 if not found.
1205	* @stable ICU 2.0
1206	*/
1207	inline int32_t lastIndexOf(const char16_t *srcChars,
1208	int32_t srcLength,
1209	int32_t start) const;
1210
1211	/**
1212	* Locate in this the last occurrence in the range
1213	* [`start`, `start + length`) of the characters
1214	* in `srcChars`, using bitwise comparison.
1215	* @param srcChars The text to search for.
1216	* @param srcLength the number of characters in `srcChars`
1217	* @param start The offset at which searching will start.
1218	* @param length The number of characters to search
1219	* @return The offset into this of the start of `srcChars`,
1220	* or -1 if not found.
1221	* @stable ICU 2.0
1222	*/
1223	inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1224	int32_t srcLength,
1225	int32_t start,
1226	int32_t length) const;
1227
1228	/**
1229	* Locate in this the last occurrence in the range
1230	* [`start`, `start + length`) of the characters
1231	* in `srcChars` in the range
1232	* [`srcStart`, `srcStart + srcLength`),
1233	* using bitwise comparison.
1234	* @param srcChars The text to search for.
1235	* @param srcStart the offset into `srcChars` at which
1236	* to start matching
1237	* @param srcLength the number of characters in `srcChars` to match
1238	* @param start the offset into this at which to start matching
1239	* @param length the number of characters in this to search
1240	* @return The offset into this of the start of `text`,
1241	* or -1 if not found.
1242	* @stable ICU 2.0
1243	*/
1244	int32_t lastIndexOf(const char16_t *srcChars,
1245	int32_t srcStart,
1246	int32_t srcLength,
1247	int32_t start,
1248	int32_t length) const;
1249
1250	/**
1251	* Locate in this the last occurrence of the BMP code point `c`,
1252	* using bitwise comparison.
1253	* @param c The code unit to search for.
1254	* @return The offset into this of `c`, or -1 if not found.
1255	* @stable ICU 2.0
1256	*/
1257	inline int32_t lastIndexOf(char16_t c) const;
1258
1259	/**
1260	* Locate in this the last occurrence of the code point `c`,
1261	* using bitwise comparison.
1262	*
1263	* @param c The code point to search for.
1264	* @return The offset into this of `c`, or -1 if not found.
1265	* @stable ICU 2.0
1266	*/
1267	inline int32_t lastIndexOf(UChar32 c) const;
1268
1269	/**
1270	* Locate in this the last occurrence of the BMP code point `c`
1271	* starting at offset `start`, using bitwise comparison.
1272	* @param c The code unit to search for.
1273	* @param start The offset at which searching will start.
1274	* @return The offset into this of `c`, or -1 if not found.
1275	* @stable ICU 2.0
1276	*/
1277	inline int32_t lastIndexOf(char16_t c,
1278	int32_t start) const;
1279
1280	/**
1281	* Locate in this the last occurrence of the code point `c`
1282	* starting at offset `start`, using bitwise comparison.
1283	*
1284	* @param c The code point to search for.
1285	* @param start The offset at which searching will start.
1286	* @return The offset into this of `c`, or -1 if not found.
1287	* @stable ICU 2.0
1288	*/
1289	inline int32_t lastIndexOf(UChar32 c,
1290	int32_t start) const;
1291
1292	/**
1293	* Locate in this the last occurrence of the BMP code point `c`
1294	* in the range [`start`, `start + length`),
1295	* using bitwise comparison.
1296	* @param c The code unit to search for.
1297	* @param start the offset into this at which to start matching
1298	* @param length the number of characters in this to search
1299	* @return The offset into this of `c`, or -1 if not found.
1300	* @stable ICU 2.0
1301	*/
1302	inline int32_t lastIndexOf(char16_t c,
1303	int32_t start,
1304	int32_t length) const;
1305
1306	/**
1307	* Locate in this the last occurrence of the code point `c`
1308	* in the range [`start`, `start + length`),
1309	* using bitwise comparison.
1310	*
1311	* @param c The code point to search for.
1312	* @param start the offset into this at which to start matching
1313	* @param length the number of characters in this to search
1314	* @return The offset into this of `c`, or -1 if not found.
1315	* @stable ICU 2.0
1316	*/
1317	inline int32_t lastIndexOf(UChar32 c,
1318	int32_t start,
1319	int32_t length) const;
1320
1321
1322	/ Character access /
1323
1324	/**
1325	* Return the code unit at offset `offset`.
1326	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1327	* @param offset a valid offset into the text
1328	* @return the code unit at offset `offset`
1329	* or 0xffff if the offset is not valid for this string
1330	* @stable ICU 2.0
1331	*/
1332	inline char16_t charAt(int32_t offset) const;
1333
1334	/**
1335	* Return the code unit at offset `offset`.
1336	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1337	* @param offset a valid offset into the text
1338	* @return the code unit at offset `offset`
1339	* @stable ICU 2.0
1340	*/
1341	inline char16_t operator[] (int32_t offset) const;
1342
1343	/**
1344	* Return the code point that contains the code unit
1345	* at offset `offset`.
1346	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1347	* @param offset a valid offset into the text
1348	* that indicates the text offset of any of the code units
1349	* that will be assembled into a code point (21-bit value) and returned
1350	* @return the code point of text at `offset`
1351	* or 0xffff if the offset is not valid for this string
1352	* @stable ICU 2.0
1353	*/
1354	UChar32 char32At(int32_t offset) const;
1355
1356	/**
1357	* Adjust a random-access offset so that
1358	* it points to the beginning of a Unicode character.
1359	* The offset that is passed in points to
1360	* any code unit of a code point,
1361	* while the returned offset will point to the first code unit
1362	* of the same code point.
1363	* In UTF-16, if the input offset points to a second surrogate
1364	* of a surrogate pair, then the returned offset will point
1365	* to the first surrogate.
1366	* @param offset a valid offset into one code point of the text
1367	* @return offset of the first code unit of the same code point
1368	* @see U16_SET_CP_START
1369	* @stable ICU 2.0
1370	*/
1371	int32_t getChar32Start(int32_t offset) const;
1372
1373	/**
1374	* Adjust a random-access offset so that
1375	* it points behind a Unicode character.
1376	* The offset that is passed in points behind
1377	* any code unit of a code point,
1378	* while the returned offset will point behind the last code unit
1379	* of the same code point.
1380	* In UTF-16, if the input offset points behind the first surrogate
1381	* (i.e., to the second surrogate)
1382	* of a surrogate pair, then the returned offset will point
1383	* behind the second surrogate (i.e., to the first surrogate).
1384	* @param offset a valid offset after any code unit of a code point of the text
1385	* @return offset of the first code unit after the same code point
1386	* @see U16_SET_CP_LIMIT
1387	* @stable ICU 2.0
1388	*/
1389	int32_t getChar32Limit(int32_t offset) const;
1390
1391	/**
1392	* Move the code unit index along the string by delta code points.
1393	* Interpret the input index as a code unit-based offset into the string,
1394	* move the index forward or backward by delta code points, and
1395	* return the resulting index.
1396	* The input index should point to the first code unit of a code point,
1397	* if there is more than one.
1398	*
1399	* Both input and output indexes are code unit-based as for all
1400	* string indexes/offsets in ICU (and other libraries, like MBCS char*).
1401	* If delta<0 then the index is moved backward (toward the start of the string).
1402	* If delta>0 then the index is moved forward (toward the end of the string).
1403	*
1404	* This behaves like CharacterIterator::move32(delta, kCurrent).
1405	*
1406	* Behavior for out-of-bounds indexes:
1407	* `moveIndex32` pins the input index to 0..length(), i.e.,
1408	* if the input index<0 then it is pinned to 0;
1409	* if it is index>length() then it is pinned to length().
1410	* Afterwards, the index is moved by `delta` code points
1411	* forward or backward,
1412	* but no further backward than to 0 and no further forward than to length().
1413	* The resulting index return value will be in between 0 and length(), inclusively.
1414	*
1415	* Examples:
1416	* \code
1417	* // s has code points 'a' U+10000 'b' U+10ffff U+2029
1418	* UnicodeString s(u"a\U00010000b\U0010ffff\u2029");
1419	*
1420	* // initial index: position of U+10000
1421	* int32_t index=1;
1422	*
1423	* // the following examples will all result in index==4, position of U+10ffff
1424	*
1425	* // skip 2 code points from some position in the string
1426	* index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1427	*
1428	* // go to the 3rd code point from the start of s (0-based)
1429	* index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1430	*
1431	* // go to the next-to-last code point of s
1432	* index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1433	* \endcode
1434	*
1435	* @param index input code unit index
1436	* @param delta (signed) code point count to move the index forward or backward
1437	* in the string
1438	* @return the resulting code unit index
1439	* @stable ICU 2.0
1440	*/
1441	int32_t moveIndex32(int32_t index, int32_t delta) const;
1442
1443	/ Substring extraction /
1444
1445	/**
1446	* Copy the characters in the range
1447	* [`start`, `start + length`) into the array `dst`,
1448	* beginning at `dstStart`.
1449	* If the string aliases to `dst` itself as an external buffer,
1450	* then extract() will not copy the contents.
1451	*
1452	* @param start offset of first character which will be copied into the array
1453	* @param length the number of characters to extract
1454	* @param dst array in which to copy characters. The length of `dst`
1455	* must be at least (`dstStart + length`).
1456	* @param dstStart the offset in `dst` where the first character
1457	* will be extracted
1458	* @stable ICU 2.0
1459	*/
1460	inline void extract(int32_t start,
1461	int32_t length,
1462	Char16Ptr dst,
1463	int32_t dstStart = `0`) const;
1464
1465	/**
1466	* Copy the contents of the string into dest.
1467	* This is a convenience function that
1468	* checks if there is enough space in dest,
1469	* extracts the entire string if possible,
1470	* and NUL-terminates dest if possible.
1471	*
1472	* If the string fits into dest but cannot be NUL-terminated
1473	* (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1474	* If the string itself does not fit into dest
1475	* (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1476	*
1477	* If the string aliases to `dest` itself as an external buffer,
1478	* then extract() will not copy the contents.
1479	*
1480	* @param dest Destination string buffer.
1481	* @param destCapacity Number of char16_ts available at dest.
1482	* @param errorCode ICU error code.
1483	* @return length()
1484	* @stable ICU 2.0
1485	*/
1486	int32_t
1487	extract(Char16Ptr dest, int32_t destCapacity,
1488	UErrorCode &errorCode) const;
1489
1490	/**
1491	* Copy the characters in the range
1492	* [`start`, `start + length`) into the UnicodeString
1493	* `target`.
1494	* @param start offset of first character which will be copied
1495	* @param length the number of characters to extract
1496	* @param target UnicodeString into which to copy characters.
1497	* @stable ICU 2.0
1498	*/
1499	inline void extract(int32_t start,
1500	int32_t length,
1501	UnicodeString& target) const;
1502
1503	/**
1504	* Copy the characters in the range [`start`, `limit`)
1505	* into the array `dst`, beginning at `dstStart`.
1506	* @param start offset of first character which will be copied into the array
1507	* @param limit offset immediately following the last character to be copied
1508	* @param dst array in which to copy characters. The length of `dst`
1509	* must be at least (`dstStart + (limit - start)`).
1510	* @param dstStart the offset in `dst` where the first character
1511	* will be extracted
1512	* @stable ICU 2.0
1513	*/
1514	inline void extractBetween(int32_t start,
1515	int32_t limit,
1516	char16_t *dst,
1517	int32_t dstStart = `0`) const;
1518
1519	/**
1520	* Copy the characters in the range [`start`, `limit`)
1521	* into the UnicodeString `target`. Replaceable API.
1522	* @param start offset of first character which will be copied
1523	* @param limit offset immediately following the last character to be copied
1524	* @param target UnicodeString into which to copy characters.
1525	* @stable ICU 2.0
1526	*/
1527	virtual void extractBetween(int32_t start,
1528	int32_t limit,
1529	UnicodeString& target) const override;
1530
1531	/**
1532	* Copy the characters in the range
1533	* [`start`, `start + startLength`) into an array of characters.
1534	* All characters must be invariant (see utypes.h).
1535	* Use US_INV as the last, signature-distinguishing parameter.
1536	*
1537	* This function does not write any more than `targetCapacity`
1538	* characters but returns the length of the entire output string
1539	* so that one can allocate a larger buffer and call the function again
1540	* if necessary.
1541	* The output string is NUL-terminated if possible.
1542	*
1543	* @param start offset of first character which will be copied
1544	* @param startLength the number of characters to extract
1545	* @param target the target buffer for extraction, can be nullptr
1546	* if targetLength is 0
1547	* @param targetCapacity the length of the target buffer
1548	* @param inv Signature-distinguishing parameter, use US_INV.
1549	* @return the output string length, not including the terminating NUL
1550	* @stable ICU 3.2
1551	*/
1552	int32_t extract(int32_t start,
1553	int32_t startLength,
1554	char *target,
1555	int32_t targetCapacity,
1556	enum EInvariant inv) const;
1557
1558	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
1559
1560	/**
1561	* Copy the characters in the range
1562	* [`start`, `start + length`) into an array of characters
1563	* in the platform's default codepage.
1564	* This function does not write any more than `targetLength`
1565	* characters but returns the length of the entire output string
1566	* so that one can allocate a larger buffer and call the function again
1567	* if necessary.
1568	* The output string is NUL-terminated if possible.
1569	*
1570	* @param start offset of first character which will be copied
1571	* @param startLength the number of characters to extract
1572	* @param target the target buffer for extraction
1573	* @param targetLength the length of the target buffer
1574	* If `target` is nullptr, then the number of bytes required for
1575	* `target` is returned.
1576	* @return the output string length, not including the terminating NUL
1577	* @stable ICU 2.0
1578	*/
1579	int32_t extract(int32_t start,
1580	int32_t startLength,
1581	char *target,
1582	uint32_t targetLength) const;
1583
1584	#endif
1585
1586	#if !UCONFIG_NO_CONVERSION
1587
1588	/**
1589	* Copy the characters in the range
1590	* [`start`, `start + length`) into an array of characters
1591	* in a specified codepage.
1592	* The output string is NUL-terminated.
1593	*
1594	* Recommendation: For invariant-character strings use
1595	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1596	* because it avoids object code dependencies of UnicodeString on
1597	* the conversion code.
1598	*
1599	* @param start offset of first character which will be copied
1600	* @param startLength the number of characters to extract
1601	* @param target the target buffer for extraction
1602	* @param codepage the desired codepage for the characters. 0 has
1603	* the special meaning of the default codepage
1604	* If `codepage` is an empty string (`""`),
1605	* then a simple conversion is performed on the codepage-invariant
1606	* subset ("invariant characters") of the platform encoding. See utypes.h.
1607	* If `target` is nullptr, then the number of bytes required for
1608	* `target` is returned. It is assumed that the target is big enough
1609	* to fit all of the characters.
1610	* @return the output string length, not including the terminating NUL
1611	* @stable ICU 2.0
1612	*/
1613	inline int32_t extract(int32_t start,
1614	int32_t startLength,
1615	char *target,
1616	const char codepage = `0`) const*;
1617
1618	/**
1619	* Copy the characters in the range
1620	* [`start`, `start + length`) into an array of characters
1621	* in a specified codepage.
1622	* This function does not write any more than `targetLength`
1623	* characters but returns the length of the entire output string
1624	* so that one can allocate a larger buffer and call the function again
1625	* if necessary.
1626	* The output string is NUL-terminated if possible.
1627	*
1628	* Recommendation: For invariant-character strings use
1629	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1630	* because it avoids object code dependencies of UnicodeString on
1631	* the conversion code.
1632	*
1633	* @param start offset of first character which will be copied
1634	* @param startLength the number of characters to extract
1635	* @param target the target buffer for extraction
1636	* @param targetLength the length of the target buffer
1637	* @param codepage the desired codepage for the characters. 0 has
1638	* the special meaning of the default codepage
1639	* If `codepage` is an empty string (`""`),
1640	* then a simple conversion is performed on the codepage-invariant
1641	* subset ("invariant characters") of the platform encoding. See utypes.h.
1642	* If `target` is nullptr, then the number of bytes required for
1643	* `target` is returned.
1644	* @return the output string length, not including the terminating NUL
1645	* @stable ICU 2.0
1646	*/
1647	int32_t extract(int32_t start,
1648	int32_t startLength,
1649	char *target,
1650	uint32_t targetLength,
1651	const char codepage) const*;
1652
1653	/**
1654	* Convert the UnicodeString into a codepage string using an existing UConverter.
1655	* The output string is NUL-terminated if possible.
1656	*
1657	* This function avoids the overhead of opening and closing a converter if
1658	* multiple strings are extracted.
1659	*
1660	* @param dest destination string buffer, can be nullptr if destCapacity==0
1661	* @param destCapacity the number of chars available at dest
1662	* @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1663	* or nullptr for the default converter
1664	* @param errorCode normal ICU error code
1665	* @return the length of the output string, not counting the terminating NUL;
1666	* if the length is greater than destCapacity, then the string will not fit
1667	* and a buffer of the indicated length would need to be passed in
1668	* @stable ICU 2.0
1669	*/
1670	int32_t extract(char *dest, int32_t destCapacity,
1671	UConverter *cnv,
1672	UErrorCode &errorCode) const;
1673
1674	#endif
1675
1676	/**
1677	* Create a temporary substring for the specified range.
1678	* Unlike the substring constructor and setTo() functions,
1679	* the object returned here will be a read-only alias (using getBuffer())
1680	* rather than copying the text.
1681	* As a result, this substring operation is much faster but requires
1682	* that the original string not be modified or deleted during the lifetime
1683	* of the returned substring object.
1684	* @param start offset of the first character visible in the substring
1685	* @param length length of the substring
1686	* @return a read-only alias UnicodeString object for the substring
1687	* @stable ICU 4.4
1688	*/
1689	UnicodeString tempSubString(int32_t start=`0`, int32_t length=INT32_MAX) const;
1690
1691	/**
1692	* Create a temporary substring for the specified range.
1693	* Same as tempSubString(start, length) except that the substring range
1694	* is specified as a (start, limit) pair (with an exclusive limit index)
1695	* rather than a (start, length) pair.
1696	* @param start offset of the first character visible in the substring
1697	* @param limit offset immediately following the last character visible in the substring
1698	* @return a read-only alias UnicodeString object for the substring
1699	* @stable ICU 4.4
1700	*/
1701	inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1702
1703	/**
1704	* Convert the UnicodeString to UTF-8 and write the result
1705	* to a ByteSink. This is called by toUTF8String().
1706	* Unpaired surrogates are replaced with U+FFFD.
1707	* Calls u_strToUTF8WithSub().
1708	*
1709	* @param sink A ByteSink to which the UTF-8 version of the string is written.
1710	* sink.Flush() is called at the end.
1711	* @stable ICU 4.2
1712	* @see toUTF8String
1713	*/
1714	void toUTF8(ByteSink &sink) const;
1715
1716	/**
1717	* Convert the UnicodeString to UTF-8 and append the result
1718	* to a standard string.
1719	* Unpaired surrogates are replaced with U+FFFD.
1720	* Calls toUTF8().
1721	*
1722	* @param result A standard string (or a compatible object)
1723	* to which the UTF-8 version of the string is appended.
1724	* @return The string object.
1725	* @stable ICU 4.2
1726	* @see toUTF8
1727	*/
1728	template<typename StringClass>
1729	StringClass &toUTF8String(StringClass &result) const {
1730	StringByteSink<StringClass> sbs(&result, length());
1731	toUTF8(sbs);
1732	return result;
1733	}
1734
1735	/**
1736	* Convert the UnicodeString to UTF-32.
1737	* Unpaired surrogates are replaced with U+FFFD.
1738	* Calls u_strToUTF32WithSub().
1739	*
1740	* @param utf32 destination string buffer, can be nullptr if capacity==0
1741	* @param capacity the number of UChar32s available at utf32
1742	* @param errorCode Standard ICU error code. Its input value must
1743	* pass the U_SUCCESS() test, or else the function returns
1744	* immediately. Check for U_FAILURE() on output or use with
1745	* function chaining. (See User Guide for details.)
1746	* @return The length of the UTF-32 string.
1747	* @see fromUTF32
1748	* @stable ICU 4.2
1749	*/
1750	int32_t toUTF32(UChar32 utf32, int32_t capacity, UErrorCode &errorCode) const*;
1751
1752	/ Length operations /
1753
1754	/**
1755	* Return the length of the UnicodeString object.
1756	* The length is the number of char16_t code units are in the UnicodeString.
1757	* If you want the number of code points, please use countChar32().
1758	* @return the length of the UnicodeString object
1759	* @see countChar32
1760	* @stable ICU 2.0
1761	*/
1762	inline int32_t length(void) const;
1763
1764	/**
1765	* Count Unicode code points in the length char16_t code units of the string.
1766	* A code point may occupy either one or two char16_t code units.
1767	* Counting code points involves reading all code units.
1768	*
1769	* This functions is basically the inverse of moveIndex32().
1770	*
1771	* @param start the index of the first code unit to check
1772	* @param length the number of char16_t code units to check
1773	* @return the number of code points in the specified code units
1774	* @see length
1775	* @stable ICU 2.0
1776	*/
1777	int32_t
1778	countChar32(int32_t start=`0`, int32_t length=INT32_MAX) const;
1779
1780	/**
1781	* Check if the length char16_t code units of the string
1782	* contain more Unicode code points than a certain number.
1783	* This is more efficient than counting all code points in this part of the string
1784	* and comparing that number with a threshold.
1785	* This function may not need to scan the string at all if the length
1786	* falls within a certain range, and
1787	* never needs to count more than 'number+1' code points.
1788	* Logically equivalent to (countChar32(start, length)>number).
1789	* A Unicode code point may occupy either one or two char16_t code units.
1790	*
1791	* @param start the index of the first code unit to check (0 for the entire string)
1792	* @param length the number of char16_t code units to check
1793	* (use INT32_MAX for the entire string; remember that start/length
1794	* values are pinned)
1795	* @param number The number of code points in the (sub)string is compared against
1796	* the 'number' parameter.
1797	* @return Boolean value for whether the string contains more Unicode code points
1798	* than 'number'. Same as (u_countChar32(s, length)>number).
1799	* @see countChar32
1800	* @see u_strHasMoreChar32Than
1801	* @stable ICU 2.4
1802	*/
1803	UBool
1804	hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1805
1806	/**
1807	* Determine if this string is empty.
1808	* @return true if this string contains 0 characters, false otherwise.
1809	* @stable ICU 2.0
1810	*/
1811	inline UBool isEmpty(void) const;
1812
1813	/**
1814	* Return the capacity of the internal buffer of the UnicodeString object.
1815	* This is useful together with the getBuffer functions.
1816	* See there for details.
1817	*
1818	* @return the number of char16_ts available in the internal buffer
1819	* @see getBuffer
1820	* @stable ICU 2.0
1821	*/
1822	inline int32_t getCapacity(void) const;
1823
1824	/ Other operations /
1825
1826	/**
1827	* Generate a hash code for this object.
1828	* @return The hash code of this UnicodeString.
1829	* @stable ICU 2.0
1830	*/
1831	inline int32_t hashCode(void) const;
1832
1833	/**
1834	* Determine if this object contains a valid string.
1835	* A bogus string has no value. It is different from an empty string,
1836	* although in both cases isEmpty() returns true and length() returns 0.
1837	* setToBogus() and isBogus() can be used to indicate that no string value is available.
1838	* For a bogus string, getBuffer() and getTerminatedBuffer() return nullptr, and
1839	* length() returns 0.
1840	*
1841	* @return true if the string is bogus/invalid, false otherwise
1842	* @see setToBogus()
1843	* @stable ICU 2.0
1844	*/
1845	inline UBool isBogus(void) const;
1846
1847
1848	//========================================
1849	// Write operations
1850	//========================================
1851
1852	/ Assignment operations /
1853
1854	/**
1855	* Assignment operator. Replace the characters in this UnicodeString
1856	* with the characters from `srcText`.
1857	*
1858	* Starting with ICU 2.4, the assignment operator and the copy constructor
1859	* allocate a new buffer and copy the buffer contents even for readonly aliases.
1860	* By contrast, the fastCopyFrom() function implements the old,
1861	* more efficient but less safe behavior
1862	* of making this string also a readonly alias to the same buffer.
1863	*
1864	* If the source object has an "open" buffer from getBuffer(minCapacity),
1865	* then the copy is an empty string.
1866	*
1867	* @param srcText The text containing the characters to replace
1868	* @return a reference to this
1869	* @stable ICU 2.0
1870	* @see fastCopyFrom
1871	*/
1872	UnicodeString &operator=(const UnicodeString &srcText);
1873
1874	/**
1875	* Almost the same as the assignment operator.
1876	* Replace the characters in this UnicodeString
1877	* with the characters from `srcText`.
1878	*
1879	* This function works the same as the assignment operator
1880	* for all strings except for ones that are readonly aliases.
1881	*
1882	* Starting with ICU 2.4, the assignment operator and the copy constructor
1883	* allocate a new buffer and copy the buffer contents even for readonly aliases.
1884	* This function implements the old, more efficient but less safe behavior
1885	* of making this string also a readonly alias to the same buffer.
1886	*
1887	* The fastCopyFrom function must be used only if it is known that the lifetime of
1888	* this UnicodeString does not exceed the lifetime of the aliased buffer
1889	* including its contents, for example for strings from resource bundles
1890	* or aliases to string constants.
1891	*
1892	* If the source object has an "open" buffer from getBuffer(minCapacity),
1893	* then the copy is an empty string.
1894	*
1895	* @param src The text containing the characters to replace.
1896	* @return a reference to this
1897	* @stable ICU 2.4
1898	*/
1899	UnicodeString &fastCopyFrom(const UnicodeString &src);
1900
1901	/**
1902	* Move assignment operator; might leave src in bogus state.
1903	* This string will have the same contents and state that the source string had.
1904	* The behavior is undefined if *this and src are the same object.
1905	* @param src source string
1906	* @return *this
1907	* @stable ICU 56
1908	*/
1909	UnicodeString &operator=(UnicodeString &&src) noexcept;
1910
1911	/**
1912	* Swap strings.
1913	* @param other other string
1914	* @stable ICU 56
1915	*/
1916	void swap(UnicodeString &other) noexcept;
1917
1918	/**
1919	* Non-member UnicodeString swap function.
1920	* @param s1 will get s2's contents and state
1921	* @param s2 will get s1's contents and state
1922	* @stable ICU 56
1923	*/
1924	friend inline void U_EXPORT2
1925	swap(UnicodeString &s1, UnicodeString &s2) noexcept {
1926	s1.swap(other&: s2);
1927	}
1928
1929	/**
1930	* Assignment operator. Replace the characters in this UnicodeString
1931	* with the code unit `ch`.
1932	* @param ch the code unit to replace
1933	* @return a reference to this
1934	* @stable ICU 2.0
1935	*/
1936	inline UnicodeString& operator= (char16_t ch);
1937
1938	/**
1939	* Assignment operator. Replace the characters in this UnicodeString
1940	* with the code point `ch`.
1941	* @param ch the code point to replace
1942	* @return a reference to this
1943	* @stable ICU 2.0
1944	*/
1945	inline UnicodeString& operator= (UChar32 ch);
1946
1947	/**
1948	* Set the text in the UnicodeString object to the characters
1949	* in `srcText` in the range
1950	* [`srcStart`, `srcText.length()`).
1951	* `srcText` is not modified.
1952	* @param srcText the source for the new characters
1953	* @param srcStart the offset into `srcText` where new characters
1954	* will be obtained
1955	* @return a reference to this
1956	* @stable ICU 2.2
1957	*/
1958	inline UnicodeString& setTo(const UnicodeString& srcText,
1959	int32_t srcStart);
1960
1961	/**
1962	* Set the text in the UnicodeString object to the characters
1963	* in `srcText` in the range
1964	* [`srcStart`, `srcStart + srcLength`).
1965	* `srcText` is not modified.
1966	* @param srcText the source for the new characters
1967	* @param srcStart the offset into `srcText` where new characters
1968	* will be obtained
1969	* @param srcLength the number of characters in `srcText` in the
1970	* replace string.
1971	* @return a reference to this
1972	* @stable ICU 2.0
1973	*/
1974	inline UnicodeString& setTo(const UnicodeString& srcText,
1975	int32_t srcStart,
1976	int32_t srcLength);
1977
1978	/**
1979	* Set the text in the UnicodeString object to the characters in
1980	* `srcText`.
1981	* `srcText` is not modified.
1982	* @param srcText the source for the new characters
1983	* @return a reference to this
1984	* @stable ICU 2.0
1985	*/
1986	inline UnicodeString& setTo(const UnicodeString& srcText);
1987
1988	/**
1989	* Set the characters in the UnicodeString object to the characters
1990	* in `srcChars`. `srcChars` is not modified.
1991	* @param srcChars the source for the new characters
1992	* @param srcLength the number of Unicode characters in srcChars.
1993	* @return a reference to this
1994	* @stable ICU 2.0
1995	*/
1996	inline UnicodeString& setTo(const char16_t *srcChars,
1997	int32_t srcLength);
1998
1999	/**
2000	* Set the characters in the UnicodeString object to the code unit
2001	* `srcChar`.
2002	* @param srcChar the code unit which becomes the UnicodeString's character
2003	* content
2004	* @return a reference to this
2005	* @stable ICU 2.0
2006	*/
2007	inline UnicodeString& setTo(char16_t srcChar);
2008
2009	/**
2010	* Set the characters in the UnicodeString object to the code point
2011	* `srcChar`.
2012	* @param srcChar the code point which becomes the UnicodeString's character
2013	* content
2014	* @return a reference to this
2015	* @stable ICU 2.0
2016	*/
2017	inline UnicodeString& setTo(UChar32 srcChar);
2018
2019	/**
2020	* Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
2021	* The text will be used for the UnicodeString object, but
2022	* it will not be released when the UnicodeString is destroyed.
2023	* This has copy-on-write semantics:
2024	* When the string is modified, then the buffer is first copied into
2025	* newly allocated memory.
2026	* The aliased buffer is never modified.
2027	*
2028	* In an assignment to another UnicodeString, when using the copy constructor
2029	* or the assignment operator, the text will be copied.
2030	* When using fastCopyFrom(), the text will be aliased again,
2031	* so that both strings then alias the same readonly-text.
2032	*
2033	* @param isTerminated specifies if `text` is `NUL`-terminated.
2034	* This must be true if `textLength==-1`.
2035	* @param text The characters to alias for the UnicodeString.
2036	* @param textLength The number of Unicode characters in `text` to alias.
2037	* If -1, then this constructor will determine the length
2038	* by calling `u_strlen()`.
2039	* @return a reference to this
2040	* @stable ICU 2.0
2041	*/
2042	UnicodeString &setTo(UBool isTerminated,
2043	ConstChar16Ptr text,
2044	int32_t textLength);
2045
2046	/**
2047	* Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
2048	* The text will be used for the UnicodeString object, but
2049	* it will not be released when the UnicodeString is destroyed.
2050	* This has write-through semantics:
2051	* For as long as the capacity of the buffer is sufficient, write operations
2052	* will directly affect the buffer. When more capacity is necessary, then
2053	* a new buffer will be allocated and the contents copied as with regularly
2054	* constructed strings.
2055	* In an assignment to another UnicodeString, the buffer will be copied.
2056	* The extract(Char16Ptr dst) function detects whether the dst pointer is the same
2057	* as the string buffer itself and will in this case not copy the contents.
2058	*
2059	* @param buffer The characters to alias for the UnicodeString.
2060	* @param buffLength The number of Unicode characters in `buffer` to alias.
2061	* @param buffCapacity The size of `buffer` in char16_ts.
2062	* @return a reference to this
2063	* @stable ICU 2.0
2064	*/
2065	UnicodeString &setTo(char16_t *buffer,
2066	int32_t buffLength,
2067	int32_t buffCapacity);
2068
2069	/**
2070	* Make this UnicodeString object invalid.
2071	* The string will test true with isBogus().
2072	*
2073	* A bogus string has no value. It is different from an empty string.
2074	* It can be used to indicate that no string value is available.
2075	* getBuffer() and getTerminatedBuffer() return nullptr, and
2076	* length() returns 0.
2077	*
2078	* This utility function is used throughout the UnicodeString
2079	* implementation to indicate that a UnicodeString operation failed,
2080	* and may be used in other functions,
2081	* especially but not exclusively when such functions do not
2082	* take a UErrorCode for simplicity.
2083	*
2084	* The following methods, and no others, will clear a string object's bogus flag:
2085	* - remove()
2086	* - remove(0, INT32_MAX)
2087	* - truncate(0)
2088	* - operator=() (assignment operator)
2089	* - setTo(...)
2090	*
2091	* The simplest ways to turn a bogus string into an empty one
2092	* is to use the remove() function.
2093	* Examples for other functions that are equivalent to "set to empty string":
2094	* \code
2095	* if(s.isBogus()) {
2096	* s.remove(); // set to an empty string (remove all), or
2097	* s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2098	* s.truncate(0); // set to an empty string (complete truncation), or
2099	* s=UnicodeString(); // assign an empty string, or
2100	* s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2101	* s.setTo(u"", 0); // set to an empty C Unicode string
2102	* }
2103	* \endcode
2104	*
2105	* @see isBogus()
2106	* @stable ICU 2.0
2107	*/
2108	void setToBogus();
2109
2110	/**
2111	* Set the character at the specified offset to the specified character.
2112	* @param offset A valid offset into the text of the character to set
2113	* @param ch The new character
2114	* @return A reference to this
2115	* @stable ICU 2.0
2116	*/
2117	UnicodeString& setCharAt(int32_t offset,
2118	char16_t ch);
2119
2120
2121	/ Append operations /
2122
2123	/**
2124	* Append operator. Append the code unit `ch` to the UnicodeString
2125	* object.
2126	* @param ch the code unit to be appended
2127	* @return a reference to this
2128	* @stable ICU 2.0
2129	*/
2130	inline UnicodeString& operator+= (char16_t ch);
2131
2132	/**
2133	* Append operator. Append the code point `ch` to the UnicodeString
2134	* object.
2135	* @param ch the code point to be appended
2136	* @return a reference to this
2137	* @stable ICU 2.0
2138	*/
2139	inline UnicodeString& operator+= (UChar32 ch);
2140
2141	/**
2142	* Append operator. Append the characters in `srcText` to the
2143	* UnicodeString object. `srcText` is not modified.
2144	* @param srcText the source for the new characters
2145	* @return a reference to this
2146	* @stable ICU 2.0
2147	*/
2148	inline UnicodeString& operator+= (const UnicodeString& srcText);
2149
2150	/**
2151	* Append the characters
2152	* in `srcText` in the range
2153	* [`srcStart`, `srcStart + srcLength`) to the
2154	* UnicodeString object at offset `start`. `srcText`
2155	* is not modified.
2156	* @param srcText the source for the new characters
2157	* @param srcStart the offset into `srcText` where new characters
2158	* will be obtained
2159	* @param srcLength the number of characters in `srcText` in
2160	* the append string
2161	* @return a reference to this
2162	* @stable ICU 2.0
2163	*/
2164	inline UnicodeString& append(const UnicodeString& srcText,
2165	int32_t srcStart,
2166	int32_t srcLength);
2167
2168	/**
2169	* Append the characters in `srcText` to the UnicodeString object.
2170	* `srcText` is not modified.
2171	* @param srcText the source for the new characters
2172	* @return a reference to this
2173	* @stable ICU 2.0
2174	*/
2175	inline UnicodeString& append(const UnicodeString& srcText);
2176
2177	/**
2178	* Append the characters in `srcChars` in the range
2179	* [`srcStart`, `srcStart + srcLength`) to the UnicodeString
2180	* object at offset
2181	* `start`. `srcChars` is not modified.
2182	* @param srcChars the source for the new characters
2183	* @param srcStart the offset into `srcChars` where new characters
2184	* will be obtained
2185	* @param srcLength the number of characters in `srcChars` in
2186	* the append string; can be -1 if `srcChars` is NUL-terminated
2187	* @return a reference to this
2188	* @stable ICU 2.0
2189	*/
2190	inline UnicodeString& append(const char16_t *srcChars,
2191	int32_t srcStart,
2192	int32_t srcLength);
2193
2194	/**
2195	* Append the characters in `srcChars` to the UnicodeString object
2196	* at offset `start`. `srcChars` is not modified.
2197	* @param srcChars the source for the new characters
2198	* @param srcLength the number of Unicode characters in `srcChars`;
2199	* can be -1 if `srcChars` is NUL-terminated
2200	* @return a reference to this
2201	* @stable ICU 2.0
2202	*/
2203	inline UnicodeString& append(ConstChar16Ptr srcChars,
2204	int32_t srcLength);
2205
2206	/**
2207	* Append the code unit `srcChar` to the UnicodeString object.
2208	* @param srcChar the code unit to append
2209	* @return a reference to this
2210	* @stable ICU 2.0
2211	*/
2212	inline UnicodeString& append(char16_t srcChar);
2213
2214	/**
2215	* Append the code point `srcChar` to the UnicodeString object.
2216	* @param srcChar the code point to append
2217	* @return a reference to this
2218	* @stable ICU 2.0
2219	*/
2220	UnicodeString& append(UChar32 srcChar);
2221
2222
2223	/ Insert operations /
2224
2225	/**
2226	* Insert the characters in `srcText` in the range
2227	* [`srcStart`, `srcStart + srcLength`) into the UnicodeString
2228	* object at offset `start`. `srcText` is not modified.
2229	* @param start the offset where the insertion begins
2230	* @param srcText the source for the new characters
2231	* @param srcStart the offset into `srcText` where new characters
2232	* will be obtained
2233	* @param srcLength the number of characters in `srcText` in
2234	* the insert string
2235	* @return a reference to this
2236	* @stable ICU 2.0
2237	*/
2238	inline UnicodeString& insert(int32_t start,
2239	const UnicodeString& srcText,
2240	int32_t srcStart,
2241	int32_t srcLength);
2242
2243	/**
2244	* Insert the characters in `srcText` into the UnicodeString object
2245	* at offset `start`. `srcText` is not modified.
2246	* @param start the offset where the insertion begins
2247	* @param srcText the source for the new characters
2248	* @return a reference to this
2249	* @stable ICU 2.0
2250	*/
2251	inline UnicodeString& insert(int32_t start,
2252	const UnicodeString& srcText);
2253
2254	/**
2255	* Insert the characters in `srcChars` in the range
2256	* [`srcStart`, `srcStart + srcLength`) into the UnicodeString
2257	* object at offset `start`. `srcChars` is not modified.
2258	* @param start the offset at which the insertion begins
2259	* @param srcChars the source for the new characters
2260	* @param srcStart the offset into `srcChars` where new characters
2261	* will be obtained
2262	* @param srcLength the number of characters in `srcChars`
2263	* in the insert string
2264	* @return a reference to this
2265	* @stable ICU 2.0
2266	*/
2267	inline UnicodeString& insert(int32_t start,
2268	const char16_t *srcChars,
2269	int32_t srcStart,
2270	int32_t srcLength);
2271
2272	/**
2273	* Insert the characters in `srcChars` into the UnicodeString object
2274	* at offset `start`. `srcChars` is not modified.
2275	* @param start the offset where the insertion begins
2276	* @param srcChars the source for the new characters
2277	* @param srcLength the number of Unicode characters in srcChars.
2278	* @return a reference to this
2279	* @stable ICU 2.0
2280	*/
2281	inline UnicodeString& insert(int32_t start,
2282	ConstChar16Ptr srcChars,
2283	int32_t srcLength);
2284
2285	/**
2286	* Insert the code unit `srcChar` into the UnicodeString object at
2287	* offset `start`.
2288	* @param start the offset at which the insertion occurs
2289	* @param srcChar the code unit to insert
2290	* @return a reference to this
2291	* @stable ICU 2.0
2292	*/
2293	inline UnicodeString& insert(int32_t start,
2294	char16_t srcChar);
2295
2296	/**
2297	* Insert the code point `srcChar` into the UnicodeString object at
2298	* offset `start`.
2299	* @param start the offset at which the insertion occurs
2300	* @param srcChar the code point to insert
2301	* @return a reference to this
2302	* @stable ICU 2.0
2303	*/
2304	inline UnicodeString& insert(int32_t start,
2305	UChar32 srcChar);
2306
2307
2308	/ Replace operations /
2309
2310	/**
2311	* Replace the characters in the range
2312	* [`start`, `start + length`) with the characters in
2313	* `srcText` in the range
2314	* [`srcStart`, `srcStart + srcLength`).
2315	* `srcText` is not modified.
2316	* @param start the offset at which the replace operation begins
2317	* @param length the number of characters to replace. The character at
2318	* `start + length` is not modified.
2319	* @param srcText the source for the new characters
2320	* @param srcStart the offset into `srcText` where new characters
2321	* will be obtained
2322	* @param srcLength the number of characters in `srcText` in
2323	* the replace string
2324	* @return a reference to this
2325	* @stable ICU 2.0
2326	*/
2327	inline UnicodeString& replace(int32_t start,
2328	int32_t length,
2329	const UnicodeString& srcText,
2330	int32_t srcStart,
2331	int32_t srcLength);
2332
2333	/**
2334	* Replace the characters in the range
2335	* [`start`, `start + length`)
2336	* with the characters in `srcText`. `srcText` is
2337	* not modified.
2338	* @param start the offset at which the replace operation begins
2339	* @param length the number of characters to replace. The character at
2340	* `start + length` is not modified.
2341	* @param srcText the source for the new characters
2342	* @return a reference to this
2343	* @stable ICU 2.0
2344	*/
2345	inline UnicodeString& replace(int32_t start,
2346	int32_t length,
2347	const UnicodeString& srcText);
2348
2349	/**
2350	* Replace the characters in the range
2351	* [`start`, `start + length`) with the characters in
2352	* `srcChars` in the range
2353	* [`srcStart`, `srcStart + srcLength`). `srcChars`
2354	* is not modified.
2355	* @param start the offset at which the replace operation begins
2356	* @param length the number of characters to replace. The character at
2357	* `start + length` is not modified.
2358	* @param srcChars the source for the new characters
2359	* @param srcStart the offset into `srcChars` where new characters
2360	* will be obtained
2361	* @param srcLength the number of characters in `srcChars`
2362	* in the replace string
2363	* @return a reference to this
2364	* @stable ICU 2.0
2365	*/
2366	inline UnicodeString& replace(int32_t start,
2367	int32_t length,
2368	const char16_t *srcChars,
2369	int32_t srcStart,
2370	int32_t srcLength);
2371
2372	/**
2373	* Replace the characters in the range
2374	* [`start`, `start + length`) with the characters in
2375	* `srcChars`. `srcChars` is not modified.
2376	* @param start the offset at which the replace operation begins
2377	* @param length number of characters to replace. The character at
2378	* `start + length` is not modified.
2379	* @param srcChars the source for the new characters
2380	* @param srcLength the number of Unicode characters in srcChars
2381	* @return a reference to this
2382	* @stable ICU 2.0
2383	*/
2384	inline UnicodeString& replace(int32_t start,
2385	int32_t length,
2386	ConstChar16Ptr srcChars,
2387	int32_t srcLength);
2388
2389	/**
2390	* Replace the characters in the range
2391	* [`start`, `start + length`) with the code unit
2392	* `srcChar`.
2393	* @param start the offset at which the replace operation begins
2394	* @param length the number of characters to replace. The character at
2395	* `start + length` is not modified.
2396	* @param srcChar the new code unit
2397	* @return a reference to this
2398	* @stable ICU 2.0
2399	*/
2400	inline UnicodeString& replace(int32_t start,
2401	int32_t length,
2402	char16_t srcChar);
2403
2404	/**
2405	* Replace the characters in the range
2406	* [`start`, `start + length`) with the code point
2407	* `srcChar`.
2408	* @param start the offset at which the replace operation begins
2409	* @param length the number of characters to replace. The character at
2410	* `start + length` is not modified.
2411	* @param srcChar the new code point
2412	* @return a reference to this
2413	* @stable ICU 2.0
2414	*/
2415	UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2416
2417	/**
2418	* Replace the characters in the range [`start`, `limit`)
2419	* with the characters in `srcText`. `srcText` is not modified.
2420	* @param start the offset at which the replace operation begins
2421	* @param limit the offset immediately following the replace range
2422	* @param srcText the source for the new characters
2423	* @return a reference to this
2424	* @stable ICU 2.0
2425	*/
2426	inline UnicodeString& replaceBetween(int32_t start,
2427	int32_t limit,
2428	const UnicodeString& srcText);
2429
2430	/**
2431	* Replace the characters in the range [`start`, `limit`)
2432	* with the characters in `srcText` in the range
2433	* [`srcStart`, `srcLimit`). `srcText` is not modified.
2434	* @param start the offset at which the replace operation begins
2435	* @param limit the offset immediately following the replace range
2436	* @param srcText the source for the new characters
2437	* @param srcStart the offset into `srcChars` where new characters
2438	* will be obtained
2439	* @param srcLimit the offset immediately following the range to copy
2440	* in `srcText`
2441	* @return a reference to this
2442	* @stable ICU 2.0
2443	*/
2444	inline UnicodeString& replaceBetween(int32_t start,
2445	int32_t limit,
2446	const UnicodeString& srcText,
2447	int32_t srcStart,
2448	int32_t srcLimit);
2449
2450	/**
2451	* Replace a substring of this object with the given text.
2452	* @param start the beginning index, inclusive; `0 <= start <= limit`.
2453	* @param limit the ending index, exclusive; `start <= limit <= length()`.
2454	* @param text the text to replace characters `start` to `limit - 1`
2455	* @stable ICU 2.0
2456	*/
2457	virtual void handleReplaceBetween(int32_t start,
2458	int32_t limit,
2459	const UnicodeString& text) override;
2460
2461	/**
2462	* Replaceable API
2463	* @return true if it has MetaData
2464	* @stable ICU 2.4
2465	*/
2466	virtual UBool hasMetaData() const override;
2467
2468	/**
2469	* Copy a substring of this object, retaining attribute (out-of-band)
2470	* information. This method is used to duplicate or reorder substrings.
2471	* The destination index must not overlap the source range.
2472	*
2473	* @param start the beginning index, inclusive; `0 <= start <= limit`.
2474	* @param limit the ending index, exclusive; `start <= limit <= length()`.
2475	* @param dest the destination index. The characters from
2476	* `start..limit-1` will be copied to `dest`.
2477	* Implementations of this method may assume that `dest <= start \|\|
2478	* dest >= limit`.
2479	* @stable ICU 2.0
2480	*/
2481	virtual void copy(int32_t start, int32_t limit, int32_t dest) override;
2482
2483	/ Search and replace operations /
2484
2485	/**
2486	* Replace all occurrences of characters in oldText with the characters
2487	* in newText
2488	* @param oldText the text containing the search text
2489	* @param newText the text containing the replacement text
2490	* @return a reference to this
2491	* @stable ICU 2.0
2492	*/
2493	inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2494	const UnicodeString& newText);
2495
2496	/**
2497	* Replace all occurrences of characters in oldText with characters
2498	* in newText
2499	* in the range [`start`, `start + length`).
2500	* @param start the start of the range in which replace will performed
2501	* @param length the length of the range in which replace will be performed
2502	* @param oldText the text containing the search text
2503	* @param newText the text containing the replacement text
2504	* @return a reference to this
2505	* @stable ICU 2.0
2506	*/
2507	inline UnicodeString& findAndReplace(int32_t start,
2508	int32_t length,
2509	const UnicodeString& oldText,
2510	const UnicodeString& newText);
2511
2512	/**
2513	* Replace all occurrences of characters in oldText in the range
2514	* [`oldStart`, `oldStart + oldLength`) with the characters
2515	* in newText in the range
2516	* [`newStart`, `newStart + newLength`)
2517	* in the range [`start`, `start + length`).
2518	* @param start the start of the range in which replace will performed
2519	* @param length the length of the range in which replace will be performed
2520	* @param oldText the text containing the search text
2521	* @param oldStart the start of the search range in `oldText`
2522	* @param oldLength the length of the search range in `oldText`
2523	* @param newText the text containing the replacement text
2524	* @param newStart the start of the replacement range in `newText`
2525	* @param newLength the length of the replacement range in `newText`
2526	* @return a reference to this
2527	* @stable ICU 2.0
2528	*/
2529	UnicodeString& findAndReplace(int32_t start,
2530	int32_t length,
2531	const UnicodeString& oldText,
2532	int32_t oldStart,
2533	int32_t oldLength,
2534	const UnicodeString& newText,
2535	int32_t newStart,
2536	int32_t newLength);
2537
2538
2539	/ Remove operations /
2540
2541	/**
2542	* Removes all characters from the UnicodeString object and clears the bogus flag.
2543	* This is the UnicodeString equivalent of std::string’s clear().
2544	*
2545	* @return a reference to this
2546	* @see setToBogus
2547	* @stable ICU 2.0
2548	*/
2549	inline UnicodeString& remove();
2550
2551	/**
2552	* Remove the characters in the range
2553	* [`start`, `start + length`) from the UnicodeString object.
2554	* @param start the offset of the first character to remove
2555	* @param length the number of characters to remove
2556	* @return a reference to this
2557	* @stable ICU 2.0
2558	*/
2559	inline UnicodeString& remove(int32_t start,
2560	int32_t length = (int32_t)INT32_MAX);
2561
2562	/**
2563	* Remove the characters in the range
2564	* [`start`, `limit`) from the UnicodeString object.
2565	* @param start the offset of the first character to remove
2566	* @param limit the offset immediately following the range to remove
2567	* @return a reference to this
2568	* @stable ICU 2.0
2569	*/
2570	inline UnicodeString& removeBetween(int32_t start,
2571	int32_t limit = (int32_t)INT32_MAX);
2572
2573	/**
2574	* Retain only the characters in the range
2575	* [`start`, `limit`) from the UnicodeString object.
2576	* Removes characters before `start` and at and after `limit`.
2577	* @param start the offset of the first character to retain
2578	* @param limit the offset immediately following the range to retain
2579	* @return a reference to this
2580	* @stable ICU 4.4
2581	*/
2582	inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2583
2584	/ Length operations /
2585
2586	/**
2587	* Pad the start of this UnicodeString with the character `padChar`.
2588	* If the length of this UnicodeString is less than targetLength,
2589	* length() - targetLength copies of padChar will be added to the
2590	* beginning of this UnicodeString.
2591	* @param targetLength the desired length of the string
2592	* @param padChar the character to use for padding. Defaults to
2593	* space (U+0020)
2594	* @return true if the text was padded, false otherwise.
2595	* @stable ICU 2.0
2596	*/
2597	UBool padLeading(int32_t targetLength,
2598	char16_t padChar = `0x0020`);
2599
2600	/**
2601	* Pad the end of this UnicodeString with the character `padChar`.
2602	* If the length of this UnicodeString is less than targetLength,
2603	* length() - targetLength copies of padChar will be added to the
2604	* end of this UnicodeString.
2605	* @param targetLength the desired length of the string
2606	* @param padChar the character to use for padding. Defaults to
2607	* space (U+0020)
2608	* @return true if the text was padded, false otherwise.
2609	* @stable ICU 2.0
2610	*/
2611	UBool padTrailing(int32_t targetLength,
2612	char16_t padChar = `0x0020`);
2613
2614	/**
2615	* Truncate this UnicodeString to the `targetLength`.
2616	* @param targetLength the desired length of this UnicodeString.
2617	* @return true if the text was truncated, false otherwise
2618	* @stable ICU 2.0
2619	*/
2620	inline UBool truncate(int32_t targetLength);
2621
2622	/**
2623	* Trims leading and trailing whitespace from this UnicodeString.
2624	* @return a reference to this
2625	* @stable ICU 2.0
2626	*/
2627	UnicodeString& trim(void);
2628
2629
2630	/ Miscellaneous operations /
2631
2632	/**
2633	* Reverse this UnicodeString in place.
2634	* @return a reference to this
2635	* @stable ICU 2.0
2636	*/
2637	inline UnicodeString& reverse(void);
2638
2639	/**
2640	* Reverse the range [`start`, `start + length`) in
2641	* this UnicodeString.
2642	* @param start the start of the range to reverse
2643	* @param length the number of characters to to reverse
2644	* @return a reference to this
2645	* @stable ICU 2.0
2646	*/
2647	inline UnicodeString& reverse(int32_t start,
2648	int32_t length);
2649
2650	/**
2651	* Convert the characters in this to UPPER CASE following the conventions of
2652	* the default locale.
2653	* @return A reference to this.
2654	* @stable ICU 2.0
2655	*/
2656	UnicodeString& toUpper(void);
2657
2658	/**
2659	* Convert the characters in this to UPPER CASE following the conventions of
2660	* a specific locale.
2661	* @param locale The locale containing the conventions to use.
2662	* @return A reference to this.
2663	* @stable ICU 2.0
2664	*/
2665	UnicodeString& toUpper(const Locale& locale);
2666
2667	/**
2668	* Convert the characters in this to lower case following the conventions of
2669	* the default locale.
2670	* @return A reference to this.
2671	* @stable ICU 2.0
2672	*/
2673	UnicodeString& toLower(void);
2674
2675	/**
2676	* Convert the characters in this to lower case following the conventions of
2677	* a specific locale.
2678	* @param locale The locale containing the conventions to use.
2679	* @return A reference to this.
2680	* @stable ICU 2.0
2681	*/
2682	UnicodeString& toLower(const Locale& locale);
2683
2684	#if !UCONFIG_NO_BREAK_ITERATION
2685
2686	/**
2687	* Titlecase this string, convenience function using the default locale.
2688	*
2689	* Casing is locale-dependent and context-sensitive.
2690	* Titlecasing uses a break iterator to find the first characters of words
2691	* that are to be titlecased. It titlecases those characters and lowercases
2692	* all others.
2693	*
2694	* The titlecase break iterator can be provided to customize for arbitrary
2695	* styles, using rules and dictionaries beyond the standard iterators.
2696	* It may be more efficient to always provide an iterator to avoid
2697	* opening and closing one for each string.
2698	* The standard titlecase iterator for the root locale implements the
2699	* algorithm of Unicode TR 21.
2700	*
2701	* This function uses only the setText(), first() and next() methods of the
2702	* provided break iterator.
2703	*
2704	* @param titleIter A break iterator to find the first characters of words
2705	* that are to be titlecased.
2706	* If none is provided (0), then a standard titlecase
2707	* break iterator is opened.
2708	* Otherwise the provided iterator is set to the string's text.
2709	* @return A reference to this.
2710	* @stable ICU 2.1
2711	*/
2712	UnicodeString &toTitle(BreakIterator *titleIter);
2713
2714	/**
2715	* Titlecase this string.
2716	*
2717	* Casing is locale-dependent and context-sensitive.
2718	* Titlecasing uses a break iterator to find the first characters of words
2719	* that are to be titlecased. It titlecases those characters and lowercases
2720	* all others.
2721	*
2722	* The titlecase break iterator can be provided to customize for arbitrary
2723	* styles, using rules and dictionaries beyond the standard iterators.
2724	* It may be more efficient to always provide an iterator to avoid
2725	* opening and closing one for each string.
2726	* The standard titlecase iterator for the root locale implements the
2727	* algorithm of Unicode TR 21.
2728	*
2729	* This function uses only the setText(), first() and next() methods of the
2730	* provided break iterator.
2731	*
2732	* @param titleIter A break iterator to find the first characters of words
2733	* that are to be titlecased.
2734	* If none is provided (0), then a standard titlecase
2735	* break iterator is opened.
2736	* Otherwise the provided iterator is set to the string's text.
2737	* @param locale The locale to consider.
2738	* @return A reference to this.
2739	* @stable ICU 2.1
2740	*/
2741	UnicodeString &toTitle(BreakIterator titleIter, const* Locale &locale);
2742
2743	/**
2744	* Titlecase this string, with options.
2745	*
2746	* Casing is locale-dependent and context-sensitive.
2747	* Titlecasing uses a break iterator to find the first characters of words
2748	* that are to be titlecased. It titlecases those characters and lowercases
2749	* all others. (This can be modified with options.)
2750	*
2751	* The titlecase break iterator can be provided to customize for arbitrary
2752	* styles, using rules and dictionaries beyond the standard iterators.
2753	* It may be more efficient to always provide an iterator to avoid
2754	* opening and closing one for each string.
2755	* The standard titlecase iterator for the root locale implements the
2756	* algorithm of Unicode TR 21.
2757	*
2758	* This function uses only the setText(), first() and next() methods of the
2759	* provided break iterator.
2760	*
2761	* @param titleIter A break iterator to find the first characters of words
2762	* that are to be titlecased.
2763	* If none is provided (0), then a standard titlecase
2764	* break iterator is opened.
2765	* Otherwise the provided iterator is set to the string's text.
2766	* @param locale The locale to consider.
2767	* @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
2768	* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
2769	* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
2770	* @return A reference to this.
2771	* @stable ICU 3.8
2772	*/
2773	UnicodeString &toTitle(BreakIterator titleIter, const* Locale &locale, uint32_t options);
2774
2775	#endif
2776
2777	/**
2778	* Case-folds the characters in this string.
2779	*
2780	* Case-folding is locale-independent and not context-sensitive,
2781	* but there is an option for whether to include or exclude mappings for dotted I
2782	* and dotless i that are marked with 'T' in CaseFolding.txt.
2783	*
2784	* The result may be longer or shorter than the original.
2785	*
2786	* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2787	* @return A reference to this.
2788	* @stable ICU 2.0
2789	*/
2790	UnicodeString &foldCase(uint32_t options=`0` /U_FOLD_CASE_DEFAULT/);
2791
2792	//========================================
2793	// Access to the internal buffer
2794	//========================================
2795
2796	/**
2797	* Get a read/write pointer to the internal buffer.
2798	* The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
2799	* writable, and is still owned by the UnicodeString object.
2800	* Calls to getBuffer(minCapacity) must not be nested, and
2801	* must be matched with calls to releaseBuffer(newLength).
2802	* If the string buffer was read-only or shared,
2803	* then it will be reallocated and copied.
2804	*
2805	* An attempted nested call will return 0, and will not further modify the
2806	* state of the UnicodeString object.
2807	* It also returns 0 if the string is bogus.
2808	*
2809	* The actual capacity of the string buffer may be larger than minCapacity.
2810	* getCapacity() returns the actual capacity.
2811	* For many operations, the full capacity should be used to avoid reallocations.
2812	*
2813	* While the buffer is "open" between getBuffer(minCapacity)
2814	* and releaseBuffer(newLength), the following applies:
2815	* - The string length is set to 0.
2816	* - Any read API call on the UnicodeString object will behave like on a 0-length string.
2817	* - Any write API call on the UnicodeString object is disallowed and will have no effect.
2818	* - You can read from and write to the returned buffer.
2819	* - The previous string contents will still be in the buffer;
2820	* if you want to use it, then you need to call length() before getBuffer(minCapacity).
2821	* If the length() was greater than minCapacity, then any contents after minCapacity
2822	* may be lost.
2823	* The buffer contents is not NUL-terminated by getBuffer().
2824	* If length() < getCapacity() then you can terminate it by writing a NUL
2825	* at index length().
2826	* - You must call releaseBuffer(newLength) before and in order to
2827	* return to normal UnicodeString operation.
2828	*
2829	* @param minCapacity the minimum number of char16_ts that are to be available
2830	* in the buffer, starting at the returned pointer;
2831	* default to the current string capacity if minCapacity==-1
2832	* @return a writable pointer to the internal string buffer,
2833	* or nullptr if an error occurs (nested calls, out of memory)
2834	*
2835	* @see releaseBuffer
2836	* @see getTerminatedBuffer()
2837	* @stable ICU 2.0
2838	*/
2839	char16_t *getBuffer(int32_t minCapacity);
2840
2841	/**
2842	* Release a read/write buffer on a UnicodeString object with an
2843	* "open" getBuffer(minCapacity).
2844	* This function must be called in a matched pair with getBuffer(minCapacity).
2845	* releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2846	*
2847	* It will set the string length to newLength, at most to the current capacity.
2848	* If newLength==-1 then it will set the length according to the
2849	* first NUL in the buffer, or to the capacity if there is no NUL.
2850	*
2851	* After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2852	*
2853	* @param newLength the new length of the UnicodeString object;
2854	* defaults to the current capacity if newLength is greater than that;
2855	* if newLength==-1, it defaults to u_strlen(buffer) but not more than
2856	* the current capacity of the string
2857	*
2858	* @see getBuffer(int32_t minCapacity)
2859	* @stable ICU 2.0
2860	*/
2861	void releaseBuffer(int32_t newLength=-`1`);
2862
2863	/**
2864	* Get a read-only pointer to the internal buffer.
2865	* This can be called at any time on a valid UnicodeString.
2866	*
2867	* It returns 0 if the string is bogus, or
2868	* during an "open" getBuffer(minCapacity).
2869	*
2870	* It can be called as many times as desired.
2871	* The pointer that it returns will remain valid until the UnicodeString object is modified,
2872	* at which time the pointer is semantically invalidated and must not be used any more.
2873	*
2874	* The capacity of the buffer can be determined with getCapacity().
2875	* The part after length() may or may not be initialized and valid,
2876	* depending on the history of the UnicodeString object.
2877	*
2878	* The buffer contents is (probably) not NUL-terminated.
2879	* You can check if it is with
2880	* `(s.length() < s.getCapacity() && buffer[s.length()]==0)`.
2881	* (See getTerminatedBuffer().)
2882	*
2883	* The buffer may reside in read-only memory. Its contents must not
2884	* be modified.
2885	*
2886	* @return a read-only pointer to the internal string buffer,
2887	* or nullptr if the string is empty or bogus
2888	*
2889	* @see getBuffer(int32_t minCapacity)
2890	* @see getTerminatedBuffer()
2891	* @stable ICU 2.0
2892	*/
2893	inline const char16_t getBuffer() const*;
2894
2895	/**
2896	* Get a read-only pointer to the internal buffer,
2897	* making sure that it is NUL-terminated.
2898	* This can be called at any time on a valid UnicodeString.
2899	*
2900	* It returns 0 if the string is bogus, or
2901	* during an "open" getBuffer(minCapacity), or if the buffer cannot
2902	* be NUL-terminated (because memory allocation failed).
2903	*
2904	* It can be called as many times as desired.
2905	* The pointer that it returns will remain valid until the UnicodeString object is modified,
2906	* at which time the pointer is semantically invalidated and must not be used any more.
2907	*
2908	* The capacity of the buffer can be determined with getCapacity().
2909	* The part after length()+1 may or may not be initialized and valid,
2910	* depending on the history of the UnicodeString object.
2911	*
2912	* The buffer contents is guaranteed to be NUL-terminated.
2913	* getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2914	* is written.
2915	* For this reason, this function is not const, unlike getBuffer().
2916	* Note that a UnicodeString may also contain NUL characters as part of its contents.
2917	*
2918	* The buffer may reside in read-only memory. Its contents must not
2919	* be modified.
2920	*
2921	* @return a read-only pointer to the internal string buffer,
2922	* or 0 if the string is empty or bogus
2923	*
2924	* @see getBuffer(int32_t minCapacity)
2925	* @see getBuffer()
2926	* @stable ICU 2.2
2927	*/
2928	const char16_t *getTerminatedBuffer();
2929
2930	//========================================
2931	// Constructors
2932	//========================================
2933
2934	/* Construct an empty UnicodeString.*
2935	* @stable ICU 2.0
2936	*/
2937	inline UnicodeString();
2938
2939	/**
2940	* Construct a UnicodeString with capacity to hold `capacity` char16_ts
2941	* @param capacity the number of char16_ts this UnicodeString should hold
2942	* before a resize is necessary; if count is greater than 0 and count
2943	* code points c take up more space than capacity, then capacity is adjusted
2944	* accordingly.
2945	* @param c is used to initially fill the string
2946	* @param count specifies how many code points c are to be written in the
2947	* string
2948	* @stable ICU 2.0
2949	*/
2950	UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2951
2952	/**
2953	* Single char16_t (code unit) constructor.
2954	*
2955	* It is recommended to mark this constructor "explicit" by
2956	* `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
2957	* on the compiler command line or similar.
2958	* @param ch the character to place in the UnicodeString
2959	* @stable ICU 2.0
2960	*/
2961	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
2962
2963	/**
2964	* Single UChar32 (code point) constructor.
2965	*
2966	* It is recommended to mark this constructor "explicit" by
2967	* `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
2968	* on the compiler command line or similar.
2969	* @param ch the character to place in the UnicodeString
2970	* @stable ICU 2.0
2971	*/
2972	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
2973
2974	/**
2975	* char16_t* constructor.
2976	*
2977	* It is recommended to mark this constructor "explicit" by
2978	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
2979	* on the compiler command line or similar.
2980	* @param text The characters to place in the UnicodeString. `text`
2981	* must be NUL (U+0000) terminated.
2982	* @stable ICU 2.0
2983	*/
2984	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
2985
2986	#if !U_CHAR16_IS_TYPEDEF
2987	/**
2988	* uint16_t * constructor.
2989	* Delegates to UnicodeString(const char16_t *).
2990	*
2991	* It is recommended to mark this constructor "explicit" by
2992	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
2993	* on the compiler command line or similar.
2994	* @param text NUL-terminated UTF-16 string
2995	* @stable ICU 59
2996	*/
2997	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
2998	UnicodeString (ConstChar16Ptr (text)) {}
2999	#endif
3000
3001	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3002	/**
3003	* wchar_t * constructor.
3004	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3005	* Delegates to UnicodeString(const char16_t *).
3006	*
3007	* It is recommended to mark this constructor "explicit" by
3008	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3009	* on the compiler command line or similar.
3010	* @param text NUL-terminated UTF-16 string
3011	* @stable ICU 59
3012	*/
3013	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3014	UnicodeString(ConstChar16Ptr(text)) {}
3015	#endif
3016
3017	/**
3018	* nullptr_t constructor.
3019	* Effectively the same as the default constructor, makes an empty string object.
3020	*
3021	* It is recommended to mark this constructor "explicit" by
3022	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3023	* on the compiler command line or similar.
3024	* @param text nullptr
3025	* @stable ICU 59
3026	*/
3027	UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3028
3029	/**
3030	* char16_t* constructor.
3031	* @param text The characters to place in the UnicodeString.
3032	* @param textLength The number of Unicode characters in `text`
3033	* to copy.
3034	* @stable ICU 2.0
3035	*/
3036	UnicodeString(const char16_t *text,
3037	int32_t textLength);
3038
3039	#if !U_CHAR16_IS_TYPEDEF
3040	/**
3041	* uint16_t * constructor.
3042	* Delegates to UnicodeString(const char16_t *, int32_t).
3043	* @param text UTF-16 string
3044	* @param textLength string length
3045	* @stable ICU 59
3046	*/
3047	UnicodeString(const uint16_t *text, int32_t textLength) :
3048	UnicodeString (ConstChar16Ptr (text), textLength) {}
3049	#endif
3050
3051	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3052	/**
3053	* wchar_t * constructor.
3054	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3055	* Delegates to UnicodeString(const char16_t *, int32_t).
3056	* @param text NUL-terminated UTF-16 string
3057	* @param textLength string length
3058	* @stable ICU 59
3059	*/
3060	UnicodeString(const wchar_t *text, int32_t textLength) :
3061	UnicodeString(ConstChar16Ptr(text), textLength) {}
3062	#endif
3063
3064	/**
3065	* nullptr_t constructor.
3066	* Effectively the same as the default constructor, makes an empty string object.
3067	* @param text nullptr
3068	* @param textLength ignored
3069	* @stable ICU 59
3070	*/
3071	inline UnicodeString(const std::nullptr_t text, int32_t textLength);
3072
3073	/**
3074	* Readonly-aliasing char16_t* constructor.
3075	* The text will be used for the UnicodeString object, but
3076	* it will not be released when the UnicodeString is destroyed.
3077	* This has copy-on-write semantics:
3078	* When the string is modified, then the buffer is first copied into
3079	* newly allocated memory.
3080	* The aliased buffer is never modified.
3081	*
3082	* In an assignment to another UnicodeString, when using the copy constructor
3083	* or the assignment operator, the text will be copied.
3084	* When using fastCopyFrom(), the text will be aliased again,
3085	* so that both strings then alias the same readonly-text.
3086	*
3087	* @param isTerminated specifies if `text` is `NUL`-terminated.
3088	* This must be true if `textLength==-1`.
3089	* @param text The characters to alias for the UnicodeString.
3090	* @param textLength The number of Unicode characters in `text` to alias.
3091	* If -1, then this constructor will determine the length
3092	* by calling `u_strlen()`.
3093	* @stable ICU 2.0
3094	*/
3095	UnicodeString(UBool isTerminated,
3096	ConstChar16Ptr text,
3097	int32_t textLength);
3098
3099	/**
3100	* Writable-aliasing char16_t* constructor.
3101	* The text will be used for the UnicodeString object, but
3102	* it will not be released when the UnicodeString is destroyed.
3103	* This has write-through semantics:
3104	* For as long as the capacity of the buffer is sufficient, write operations
3105	* will directly affect the buffer. When more capacity is necessary, then
3106	* a new buffer will be allocated and the contents copied as with regularly
3107	* constructed strings.
3108	* In an assignment to another UnicodeString, the buffer will be copied.
3109	* The extract(Char16Ptr dst) function detects whether the dst pointer is the same
3110	* as the string buffer itself and will in this case not copy the contents.
3111	*
3112	* @param buffer The characters to alias for the UnicodeString.
3113	* @param buffLength The number of Unicode characters in `buffer` to alias.
3114	* @param buffCapacity The size of `buffer` in char16_ts.
3115	* @stable ICU 2.0
3116	*/
3117	UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3118
3119	#if !U_CHAR16_IS_TYPEDEF
3120	/**
3121	* Writable-aliasing uint16_t * constructor.
3122	* Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3123	* @param buffer writable buffer of/for UTF-16 text
3124	* @param buffLength length of the current buffer contents
3125	* @param buffCapacity buffer capacity
3126	* @stable ICU 59
3127	*/
3128	UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3129	UnicodeString (Char16Ptr (buffer), buffLength, buffCapacity) {}
3130	#endif
3131
3132	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3133	/**
3134	* Writable-aliasing wchar_t * constructor.
3135	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3136	* Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3137	* @param buffer writable buffer of/for UTF-16 text
3138	* @param buffLength length of the current buffer contents
3139	* @param buffCapacity buffer capacity
3140	* @stable ICU 59
3141	*/
3142	UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3143	UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3144	#endif
3145
3146	/**
3147	* Writable-aliasing nullptr_t constructor.
3148	* Effectively the same as the default constructor, makes an empty string object.
3149	* @param buffer nullptr
3150	* @param buffLength ignored
3151	* @param buffCapacity ignored
3152	* @stable ICU 59
3153	*/
3154	inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3155
3156	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
3157
3158	/**
3159	* char* constructor.
3160	* Uses the default converter (and thus depends on the ICU conversion code)
3161	* unless U_CHARSET_IS_UTF8 is set to 1.
3162	*
3163	* For ASCII (really "invariant character") strings it is more efficient to use
3164	* the constructor that takes a US_INV (for its enum EInvariant).
3165	* For ASCII (invariant-character) string literals, see UNICODE_STRING and
3166	* UNICODE_STRING_SIMPLE.
3167	*
3168	* It is recommended to mark this constructor "explicit" by
3169	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3170	* on the compiler command line or similar.
3171	* @param codepageData an array of bytes, null-terminated,
3172	* in the platform's default codepage.
3173	* @stable ICU 2.0
3174	* @see UNICODE_STRING
3175	* @see UNICODE_STRING_SIMPLE
3176	*/
3177	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3178
3179	/**
3180	* char* constructor.
3181	* Uses the default converter (and thus depends on the ICU conversion code)
3182	* unless U_CHARSET_IS_UTF8 is set to 1.
3183	* @param codepageData an array of bytes in the platform's default codepage.
3184	* @param dataLength The number of bytes in `codepageData`.
3185	* @stable ICU 2.0
3186	*/
3187	UnicodeString(const char *codepageData, int32_t dataLength);
3188
3189	#endif
3190
3191	#if !UCONFIG_NO_CONVERSION
3192
3193	/**
3194	* char* constructor.
3195	* @param codepageData an array of bytes, null-terminated
3196	* @param codepage the encoding of `codepageData`. The special
3197	* value 0 for `codepage` indicates that the text is in the
3198	* platform's default codepage.
3199	*
3200	* If `codepage` is an empty string (`""`),
3201	* then a simple conversion is performed on the codepage-invariant
3202	* subset ("invariant characters") of the platform encoding. See utypes.h.
3203	* Recommendation: For invariant-character strings use the constructor
3204	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3205	* because it avoids object code dependencies of UnicodeString on
3206	* the conversion code.
3207	*
3208	* @stable ICU 2.0
3209	*/
3210	UnicodeString(const char codepageData, const* char *codepage);
3211
3212	/**
3213	* char* constructor.
3214	* @param codepageData an array of bytes.
3215	* @param dataLength The number of bytes in `codepageData`.
3216	* @param codepage the encoding of `codepageData`. The special
3217	* value 0 for `codepage` indicates that the text is in the
3218	* platform's default codepage.
3219	* If `codepage` is an empty string (`""`),
3220	* then a simple conversion is performed on the codepage-invariant
3221	* subset ("invariant characters") of the platform encoding. See utypes.h.
3222	* Recommendation: For invariant-character strings use the constructor
3223	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3224	* because it avoids object code dependencies of UnicodeString on
3225	* the conversion code.
3226	*
3227	* @stable ICU 2.0
3228	*/
3229	UnicodeString(const char codepageData, int32_t dataLength, const* char *codepage);
3230
3231	/**
3232	* char * / UConverter constructor.
3233	* This constructor uses an existing UConverter object to
3234	* convert the codepage string to Unicode and construct a UnicodeString
3235	* from that.
3236	*
3237	* The converter is reset at first.
3238	* If the error code indicates a failure before this constructor is called,
3239	* or if an error occurs during conversion or construction,
3240	* then the string will be bogus.
3241	*
3242	* This function avoids the overhead of opening and closing a converter if
3243	* multiple strings are constructed.
3244	*
3245	* @param src input codepage string
3246	* @param srcLength length of the input string, can be -1 for NUL-terminated strings
3247	* @param cnv converter object (ucnv_resetToUnicode() will be called),
3248	* can be nullptr for the default converter
3249	* @param errorCode normal ICU error code
3250	* @stable ICU 2.0
3251	*/
3252	UnicodeString(
3253	const char *src, int32_t srcLength,
3254	UConverter *cnv,
3255	UErrorCode &errorCode);
3256
3257	#endif
3258
3259	/**
3260	* Constructs a Unicode string from an invariant-character char * string.
3261	* About invariant characters see utypes.h.
3262	* This constructor has no runtime dependency on conversion code and is
3263	* therefore recommended over ones taking a charset name string
3264	* (where the empty string "" indicates invariant-character conversion).
3265	*
3266	* Use the macro US_INV as the third, signature-distinguishing parameter.
3267	*
3268	* For example:
3269	* \code
3270	* void fn(const char *s) {
3271	* UnicodeString ustr(s, -1, US_INV);
3272	* // use ustr ...
3273	* }
3274	* \endcode
3275	* @param src String using only invariant characters.
3276	* @param textLength Length of src, or -1 if NUL-terminated.
3277	* @param inv Signature-distinguishing parameter, use US_INV.
3278	*
3279	* @see US_INV
3280	* @stable ICU 3.2
3281	*/
3282	UnicodeString(const char src, int32_t textLength, enum* EInvariant inv);
3283
3284
3285	/**
3286	* Copy constructor.
3287	*
3288	* Starting with ICU 2.4, the assignment operator and the copy constructor
3289	* allocate a new buffer and copy the buffer contents even for readonly aliases.
3290	* By contrast, the fastCopyFrom() function implements the old,
3291	* more efficient but less safe behavior
3292	* of making this string also a readonly alias to the same buffer.
3293	*
3294	* If the source object has an "open" buffer from getBuffer(minCapacity),
3295	* then the copy is an empty string.
3296	*
3297	* @param that The UnicodeString object to copy.
3298	* @stable ICU 2.0
3299	* @see fastCopyFrom
3300	*/
3301	UnicodeString(const UnicodeString& that);
3302
3303	/**
3304	* Move constructor; might leave src in bogus state.
3305	* This string will have the same contents and state that the source string had.
3306	* @param src source string
3307	* @stable ICU 56
3308	*/
3309	UnicodeString(UnicodeString &&src) noexcept;
3310
3311	/**
3312	* 'Substring' constructor from tail of source string.
3313	* @param src The UnicodeString object to copy.
3314	* @param srcStart The offset into `src` at which to start copying.
3315	* @stable ICU 2.2
3316	*/
3317	UnicodeString(const UnicodeString& src, int32_t srcStart);
3318
3319	/**
3320	* 'Substring' constructor from subrange of source string.
3321	* @param src The UnicodeString object to copy.
3322	* @param srcStart The offset into `src` at which to start copying.
3323	* @param srcLength The number of characters from `src` to copy.
3324	* @stable ICU 2.2
3325	*/
3326	UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3327
3328	/**
3329	* Clone this object, an instance of a subclass of Replaceable.
3330	* Clones can be used concurrently in multiple threads.
3331	* If a subclass does not implement clone(), or if an error occurs,
3332	* then nullptr is returned.
3333	* The caller must delete the clone.
3334	*
3335	* @return a clone of this object
3336	*
3337	* @see Replaceable::clone
3338	* @see getDynamicClassID
3339	* @stable ICU 2.6
3340	*/
3341	virtual UnicodeString clone() const* override;
3342
3343	/* Destructor.*
3344	* @stable ICU 2.0
3345	*/
3346	virtual ~UnicodeString();
3347
3348	/**
3349	* Create a UnicodeString from a UTF-8 string.
3350	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3351	* Calls u_strFromUTF8WithSub().
3352	*
3353	* @param utf8 UTF-8 input string.
3354	* Note that a StringPiece can be implicitly constructed
3355	* from a std::string or a NUL-terminated const char * string.
3356	* @return A UnicodeString with equivalent UTF-16 contents.
3357	* @see toUTF8
3358	* @see toUTF8String
3359	* @stable ICU 4.2
3360	*/
3361	static UnicodeString fromUTF8(StringPiece utf8);
3362
3363	/**
3364	* Create a UnicodeString from a UTF-32 string.
3365	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3366	* Calls u_strFromUTF32WithSub().
3367	*
3368	* @param utf32 UTF-32 input string. Must not be nullptr.
3369	* @param length Length of the input string, or -1 if NUL-terminated.
3370	* @return A UnicodeString with equivalent UTF-16 contents.
3371	* @see toUTF32
3372	* @stable ICU 4.2
3373	*/
3374	static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3375
3376	/ Miscellaneous operations /
3377
3378	/**
3379	* Unescape a string of characters and return a string containing
3380	* the result. The following escape sequences are recognized:
3381	*
3382	* \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
3383	* \\Uhhhhhhhh 8 hex digits
3384	* \\xhh 1-2 hex digits
3385	* \\ooo 1-3 octal digits; o in [0-7]
3386	* \\cX control-X; X is masked with 0x1F
3387	*
3388	* as well as the standard ANSI C escapes:
3389	*
3390	* \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3391	* \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3392	* \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3393	*
3394	* Anything else following a backslash is generically escaped. For
3395	* example, "[a\\-z]" returns "[a-z]".
3396	*
3397	* If an escape sequence is ill-formed, this method returns an empty
3398	* string. An example of an ill-formed sequence is "\\u" followed by
3399	* fewer than 4 hex digits.
3400	*
3401	* This function is similar to u_unescape() but not identical to it.
3402	* The latter takes a source char*, so it does escape recognition
3403	* and also invariant conversion.
3404	*
3405	* @return a string with backslash escapes interpreted, or an
3406	* empty string on error.
3407	* @see UnicodeString#unescapeAt()
3408	* @see u_unescape()
3409	* @see u_unescapeAt()
3410	* @stable ICU 2.0
3411	*/
3412	UnicodeString unescape() const;
3413
3414	/**
3415	* Unescape a single escape sequence and return the represented
3416	* character. See unescape() for a listing of the recognized escape
3417	* sequences. The character at offset-1 is assumed (without
3418	* checking) to be a backslash. If the escape sequence is
3419	* ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3420	* returned.
3421	*
3422	* @param offset an input output parameter. On input, it is the
3423	* offset into this string where the escape sequence is located,
3424	* after the initial backslash. On output, it is advanced after the
3425	* last character parsed. On error, it is not advanced at all.
3426	* @return the character represented by the escape sequence at
3427	* offset, or U_SENTINEL=-1 on error.
3428	* @see UnicodeString#unescape()
3429	* @see u_unescape()
3430	* @see u_unescapeAt()
3431	* @stable ICU 2.0
3432	*/
3433	UChar32 unescapeAt(int32_t &offset) const;
3434
3435	/**
3436	* ICU "poor man's RTTI", returns a UClassID for this class.
3437	*
3438	* @stable ICU 2.2
3439	*/
3440	static UClassID U_EXPORT2 getStaticClassID();
3441
3442	/**
3443	* ICU "poor man's RTTI", returns a UClassID for the actual class.
3444	*
3445	* @stable ICU 2.2
3446	*/
3447	virtual UClassID getDynamicClassID() const override;
3448
3449	//========================================
3450	// Implementation methods
3451	//========================================
3452
3453	protected:
3454	/**
3455	* Implement Replaceable::getLength() (see jitterbug 1027).
3456	* @stable ICU 2.4
3457	*/
3458	virtual int32_t getLength() const override;
3459
3460	/**
3461	* The change in Replaceable to use virtual getCharAt() allows
3462	* UnicodeString::charAt() to be inline again (see jitterbug 709).
3463	* @stable ICU 2.4
3464	*/
3465	virtual char16_t getCharAt(int32_t offset) const override;
3466
3467	/**
3468	* The change in Replaceable to use virtual getChar32At() allows
3469	* UnicodeString::char32At() to be inline again (see jitterbug 709).
3470	* @stable ICU 2.4
3471	*/
3472	virtual UChar32 getChar32At(int32_t offset) const override;
3473
3474	private:
3475	// For char constructors. Could be made public.*
3476	UnicodeString &setToUTF8(StringPiece utf8);
3477	// For extract(char).*
3478	// We could make a toUTF8(target, capacity, errorCode) public but not
3479	// this version: New API will be cleaner if we make callers create substrings
3480	// rather than having start+length on every method,
3481	// and it should take a UErrorCode&.
3482	int32_t
3483	toUTF8(int32_t start, int32_t len,
3484	char target, int32_t capacity) const*;
3485
3486	/**
3487	* Internal string contents comparison, called by operator==.
3488	* Requires: this & text not bogus and have same lengths.
3489	*/
3490	UBool doEquals(const UnicodeString &text, int32_t len) const;
3491
3492	inline UBool
3493	doEqualsSubstring(int32_t start,
3494	int32_t length,
3495	const UnicodeString& srcText,
3496	int32_t srcStart,
3497	int32_t srcLength) const;
3498
3499	UBool doEqualsSubstring(int32_t start,
3500	int32_t length,
3501	const char16_t *srcChars,
3502	int32_t srcStart,
3503	int32_t srcLength) const;
3504
3505	inline int8_t
3506	doCompare(int32_t start,
3507	int32_t length,
3508	const UnicodeString& srcText,
3509	int32_t srcStart,
3510	int32_t srcLength) const;
3511
3512	int8_t doCompare(int32_t start,
3513	int32_t length,
3514	const char16_t *srcChars,
3515	int32_t srcStart,
3516	int32_t srcLength) const;
3517
3518	inline int8_t
3519	doCompareCodePointOrder(int32_t start,
3520	int32_t length,
3521	const UnicodeString& srcText,
3522	int32_t srcStart,
3523	int32_t srcLength) const;
3524
3525	int8_t doCompareCodePointOrder(int32_t start,
3526	int32_t length,
3527	const char16_t *srcChars,
3528	int32_t srcStart,
3529	int32_t srcLength) const;
3530
3531	inline int8_t
3532	doCaseCompare(int32_t start,
3533	int32_t length,
3534	const UnicodeString &srcText,
3535	int32_t srcStart,
3536	int32_t srcLength,
3537	uint32_t options) const;
3538
3539	int8_t
3540	doCaseCompare(int32_t start,
3541	int32_t length,
3542	const char16_t *srcChars,
3543	int32_t srcStart,
3544	int32_t srcLength,
3545	uint32_t options) const;
3546
3547	int32_t doIndexOf(char16_t c,
3548	int32_t start,
3549	int32_t length) const;
3550
3551	int32_t doIndexOf(UChar32 c,
3552	int32_t start,
3553	int32_t length) const;
3554
3555	int32_t doLastIndexOf(char16_t c,
3556	int32_t start,
3557	int32_t length) const;
3558
3559	int32_t doLastIndexOf(UChar32 c,
3560	int32_t start,
3561	int32_t length) const;
3562
3563	void doExtract(int32_t start,
3564	int32_t length,
3565	char16_t *dst,
3566	int32_t dstStart) const;
3567
3568	inline void doExtract(int32_t start,
3569	int32_t length,
3570	UnicodeString& target) const;
3571
3572	inline char16_t doCharAt(int32_t offset) const;
3573
3574	UnicodeString& doReplace(int32_t start,
3575	int32_t length,
3576	const UnicodeString& srcText,
3577	int32_t srcStart,
3578	int32_t srcLength);
3579
3580	UnicodeString& doReplace(int32_t start,
3581	int32_t length,
3582	const char16_t *srcChars,
3583	int32_t srcStart,
3584	int32_t srcLength);
3585
3586	UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3587	UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3588
3589	UnicodeString& doReverse(int32_t start,
3590	int32_t length);
3591
3592	// calculate hash code
3593	int32_t doHashCode(void) const;
3594
3595	// get pointer to start of array
3596	// these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3597	inline char16_t* getArrayStart(void);
3598	inline const char16_t* getArrayStart(void) const;
3599
3600	inline UBool hasShortLength() const;
3601	inline int32_t getShortLength() const;
3602
3603	// A UnicodeString object (not necessarily its current buffer)
3604	// is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3605	inline UBool isWritable() const;
3606
3607	// Is the current buffer writable?
3608	inline UBool isBufferWritable() const;
3609
3610	// None of the following does releaseArray().
3611	inline void setZeroLength();
3612	inline void setShortLength(int32_t len);
3613	inline void setLength(int32_t len);
3614	inline void setToEmpty();
3615	inline void setArray(char16_t array, int32_t len, int32_t capacity); // sets length but not flags*
3616
3617	// allocate the array; result may be the stack buffer
3618	// sets refCount to 1 if appropriate
3619	// sets fArray, fCapacity, and flags
3620	// sets length to 0
3621	// returns boolean for success or failure
3622	UBool allocate(int32_t capacity);
3623
3624	// release the array if owned
3625	void releaseArray(void);
3626
3627	// turn a bogus string into an empty one
3628	void unBogus();
3629
3630	// implements assignment operator, copy constructor, and fastCopyFrom()
3631	UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
3632
3633	// Copies just the fields without memory management.
3634	void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept;
3635
3636	// Pin start and limit to acceptable values.
3637	inline void pinIndex(int32_t& start) const;
3638	inline void pinIndices(int32_t& start,
3639	int32_t& length) const;
3640
3641	#if !UCONFIG_NO_CONVERSION
3642
3643	/ Internal extract() using UConverter. /
3644	int32_t doExtract(int32_t start, int32_t length,
3645	char *dest, int32_t destCapacity,
3646	UConverter *cnv,
3647	UErrorCode &errorCode) const;
3648
3649	/*
3650	* Real constructor for converting from codepage data.
3651	* It assumes that it is called with !fRefCounted.
3652	*
3653	* If `codepage==0`, then the default converter
3654	* is used for the platform encoding.
3655	* If `codepage` is an empty string (`""`),
3656	* then a simple conversion is performed on the codepage-invariant
3657	* subset ("invariant characters") of the platform encoding. See utypes.h.
3658	*/
3659	void doCodepageCreate(const char *codepageData,
3660	int32_t dataLength,
3661	const char *codepage);
3662
3663	/*
3664	* Worker function for creating a UnicodeString from
3665	* a codepage string using a UConverter.
3666	*/
3667	void
3668	doCodepageCreate(const char *codepageData,
3669	int32_t dataLength,
3670	UConverter *converter,
3671	UErrorCode &status);
3672
3673	#endif
3674
3675	/*
3676	* This function is called when write access to the array
3677	* is necessary.
3678	*
3679	* We need to make a copy of the array if
3680	* the buffer is read-only, or
3681	* the buffer is refCounted (shared), and refCount>1, or
3682	* the buffer is too small.
3683	*
3684	* Return false if memory could not be allocated.
3685	*/
3686	UBool cloneArrayIfNeeded(int32_t newCapacity = -`1`,
3687	int32_t growCapacity = -`1`,
3688	UBool doCopyArray = true,
3689	int32_t **pBufferToDelete = `0`,
3690	UBool forceClone = false);
3691
3692	/**
3693	* Common function for UnicodeString case mappings.
3694	* The stringCaseMapper has the same type UStringCaseMapper
3695	* as in ustr_imp.h for ustrcase_map().
3696	*/
3697	UnicodeString &
3698	caseMap(int32_t caseLocale, uint32_t options,
3699	#if !UCONFIG_NO_BREAK_ITERATION
3700	BreakIterator *iter,
3701	#endif
3702	UStringCaseMapper *stringCaseMapper);
3703
3704	// ref counting
3705	void addRef(void);
3706	int32_t removeRef(void);
3707	int32_t refCount(void) const;
3708
3709	// constants
3710	enum {
3711	/**
3712	* Size of stack buffer for short strings.
3713	* Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
3714	* @see UNISTR_OBJECT_SIZE
3715	*/
3716	US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-`2`)/U_SIZEOF_UCHAR,
3717	kInvalidUChar=`0xffff`, // U+FFFF returned by charAt(invalid index)
3718	kInvalidHashCode=`0`, // invalid hash code
3719	kEmptyHashCode=`1`, // hash code for empty string
3720
3721	// bit flag values for fLengthAndFlags
3722	kIsBogus=`1`, // this string is bogus, i.e., not valid or nullptr
3723	kUsingStackBuffer=`2`,// using fUnion.fStackFields instead of fUnion.fFields
3724	kRefCounted=`4`, // there is a refCount field before the characters in fArray
3725	kBufferIsReadonly=`8`,// do not write to this buffer
3726	kOpenGetBuffer=`16`, // getBuffer(minCapacity) was called (is "open"),
3727	// and releaseBuffer(newLength) must be called
3728	kAllStorageFlags=`0x1f`,
3729
3730	kLengthShift=`5`, // remaining 11 bits for non-negative short length, or negative if long
3731	kLength1=`1`<<kLengthShift,
3732	kMaxShortLength=`0x3ff`, // max non-negative short length (leaves top bit 0)
3733	kLengthIsLarge=`0xffe0`, // short length < 0, real length is in fUnion.fFields.fLength
3734
3735	// combined values for convenience
3736	kShortString=kUsingStackBuffer,
3737	kLongString=kRefCounted,
3738	kReadonlyAlias=kBufferIsReadonly,
3739	kWritableAlias=`0`
3740	};
3741
3742	friend class UnicodeStringAppendable;
3743
3744	union StackBufferOrFields; // forward declaration necessary before friend declaration
3745	friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3746
3747	/*
3748	* The following are all the class fields that are stored
3749	* in each UnicodeString object.
3750	* Note that UnicodeString has virtual functions,
3751	* therefore there is an implicit vtable pointer
3752	* as the first real field.
3753	* The fields should be aligned such that no padding is necessary.
3754	* On 32-bit machines, the size should be 32 bytes,
3755	* on 64-bit machines (8-byte pointers), it should be 40 bytes.
3756	*
3757	* We use a hack to achieve this.
3758	*
3759	* With at least some compilers, each of the following is forced to
3760	* a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3761	* rounded up with additional padding if the fields do not already fit that requirement:
3762	* - sizeof(class UnicodeString)
3763	* - offsetof(UnicodeString, fUnion)
3764	* - sizeof(fUnion)
3765	* - sizeof(fStackFields)
3766	*
3767	* We optimize for the longest possible internal buffer for short strings.
3768	* fUnion.fStackFields begins with 2 bytes for storage flags
3769	* and the length of relatively short strings,
3770	* followed by the buffer for short string contents.
3771	* There is no padding inside fStackFields.
3772	*
3773	* Heap-allocated and aliased strings use fUnion.fFields.
3774	* Both fStackFields and fFields must begin with the same fields for flags and short length,
3775	* that is, those must have the same memory offsets inside the object,
3776	* because the flags must be inspected in order to decide which half of fUnion is being used.
3777	* We assume that the compiler does not reorder the fields.
3778	*
3779	* (Padding at the end of fFields is ok:
3780	* As long as it is no larger than fStackFields, it is not wasted space.)
3781	*
3782	* For some of the history of the UnicodeString class fields layout, see
3783	* - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
3784	* - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
3785	* - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
3786	*/
3787	// (implicit) vtable;*
3788	union StackBufferOrFields {
3789	// fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
3790	// Each struct of the union must begin with fLengthAndFlags.
3791	struct {
3792	int16_t fLengthAndFlags; // bit fields: see constants above
3793	char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
3794	} fStackFields;
3795	struct {
3796	int16_t fLengthAndFlags; // bit fields: see constants above
3797	int32_t fLength; // number of characters in fArray if >127; else undefined
3798	int32_t fCapacity; // capacity of fArray (in char16_ts)
3799	// array pointer last to minimize padding for machines with P128 data model
3800	// or pointer sizes that are not a power of 2
3801	char16_t fArray; // the Unicode data*
3802	} fFields;
3803	} fUnion;
3804	};
3805
3806	/**
3807	* Create a new UnicodeString with the concatenation of two others.
3808	*
3809	* @param s1 The first string to be copied to the new one.
3810	* @param s2 The second string to be copied to the new one, after s1.
3811	* @return UnicodeString(s1).append(s2)
3812	* @stable ICU 2.8
3813	*/
3814	U_COMMON_API UnicodeString U_EXPORT2
3815	operator+ (const UnicodeString &s1, const UnicodeString &s2);
3816
3817	//========================================
3818	// Inline members
3819	//========================================
3820
3821	//========================================
3822	// Privates
3823	//========================================
3824
3825	inline void
3826	UnicodeString::pinIndex(int32_t& start) const
3827	{
3828	// pin index
3829	if(start < `0`) {
3830	start = `0`;
3831	} else if(start > length()) {
3832	start = length();
3833	}
3834	}
3835
3836	inline void
3837	UnicodeString::pinIndices(int32_t& start,
3838	int32_t& _length) const
3839	{
3840	// pin indices
3841	int32_t len = length();
3842	if(start < `0`) {
3843	start = `0`;
3844	} else if(start > len) {
3845	start = len;
3846	}
3847	if(_length < `0`) {
3848	_length = `0`;
3849	} else if(_length > (len - start)) {
3850	_length = (len - start);
3851	}
3852	}
3853
3854	inline char16_t*
3855	UnicodeString::getArrayStart() {
3856	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3857	fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3858	}
3859
3860	inline const char16_t*
3861	UnicodeString::getArrayStart() const {
3862	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3863	fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3864	}
3865
3866	//========================================
3867	// Default constructor
3868	//========================================
3869
3870	inline
3871	UnicodeString::UnicodeString() {
3872	fUnion.fStackFields.fLengthAndFlags=kShortString;
3873	}
3874
3875	inline UnicodeString::UnicodeString(const std::nullptr_t /text/) {
3876	fUnion.fStackFields.fLengthAndFlags=kShortString;
3877	}
3878
3879	inline UnicodeString::UnicodeString(const std::nullptr_t /text/, int32_t /length/) {
3880	fUnion.fStackFields.fLengthAndFlags=kShortString;
3881	}
3882
3883	inline UnicodeString::UnicodeString(std::nullptr_t /buffer/, int32_t /buffLength/, int32_t /buffCapacity/) {
3884	fUnion.fStackFields.fLengthAndFlags=kShortString;
3885	}
3886
3887	//========================================
3888	// Read-only implementation methods
3889	//========================================
3890	inline UBool
3891	UnicodeString::hasShortLength() const {
3892	return fUnion.fFields.fLengthAndFlags>=`0`;
3893	}
3894
3895	inline int32_t
3896	UnicodeString::getShortLength() const {
3897	// fLengthAndFlags must be non-negative -> short length >= 0
3898	// and arithmetic or logical shift does not matter.
3899	return fUnion.fFields.fLengthAndFlags>>kLengthShift;
3900	}
3901
3902	inline int32_t
3903	UnicodeString::length() const {
3904	return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
3905	}
3906
3907	inline int32_t
3908	UnicodeString::getCapacity() const {
3909	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3910	US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
3911	}
3912
3913	inline int32_t
3914	UnicodeString::hashCode() const
3915	{ return doHashCode(); }
3916
3917	inline UBool
3918	UnicodeString::isBogus() const
3919	{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
3920
3921	inline UBool
3922	UnicodeString::isWritable() const
3923	{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer\|kIsBogus)); }
3924
3925	inline UBool
3926	UnicodeString::isBufferWritable() const
3927	{
3928	return (UBool)(
3929	!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer\|kIsBogus\|kBufferIsReadonly)) &&
3930	(!(fUnion.fFields.fLengthAndFlags&kRefCounted) \|\| refCount()==`1`));
3931	}
3932
3933	inline const char16_t *
3934	UnicodeString::getBuffer() const {
3935	if(fUnion.fFields.fLengthAndFlags&(kIsBogus\|kOpenGetBuffer)) {
3936	return nullptr;
3937	} else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
3938	return fUnion.fStackFields.fBuffer;
3939	} else {
3940	return fUnion.fFields.fArray;
3941	}
3942	}
3943
3944	//========================================
3945	// Read-only alias methods
3946	//========================================
3947	inline int8_t
3948	UnicodeString::doCompare(int32_t start,
3949	int32_t thisLength,
3950	const UnicodeString& srcText,
3951	int32_t srcStart,
3952	int32_t srcLength) const
3953	{
3954	if(srcText.isBogus()) {
3955	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3956	} else {
3957	srcText.pinIndices(start&: srcStart, length&: srcLength);
3958	return doCompare(start, length: thisLength, srcChars: srcText.getArrayStart(), srcStart, srcLength);
3959	}
3960	}
3961
3962	inline UBool
3963	UnicodeString::doEqualsSubstring(int32_t start,
3964	int32_t thisLength,
3965	const UnicodeString& srcText,
3966	int32_t srcStart,
3967	int32_t srcLength) const
3968	{
3969	if(srcText.isBogus()) {
3970	return isBogus();
3971	} else {
3972	srcText.pinIndices(start&: srcStart, length&: srcLength);
3973	return !isBogus() && doEqualsSubstring(start, length: thisLength, srcChars: srcText.getArrayStart(), srcStart, srcLength);
3974	}
3975	}
3976
3977	inline bool
3978	UnicodeString::operator== (const UnicodeString& text) const
3979	{
3980	if(isBogus()) {
3981	return text.isBogus();
3982	} else {
3983	int32_t len = length(), textLength = text.length();
3984	return !text.isBogus() && len == textLength && doEquals(text, len);
3985	}
3986	}
3987
3988	inline bool
3989	UnicodeString::operator!= (const UnicodeString& text) const
3990	{ return (! operator==(text)); }
3991
3992	inline UBool
3993	UnicodeString::operator> (const UnicodeString& text) const
3994	{ return doCompare(start: `0`, thisLength: length(), srcText: text, srcStart: `0`, srcLength: text.length()) == `1`; }
3995
3996	inline UBool
3997	UnicodeString::operator< (const UnicodeString& text) const
3998	{ return doCompare(start: `0`, thisLength: length(), srcText: text, srcStart: `0`, srcLength: text.length()) == -`1`; }
3999
4000	inline UBool
4001	UnicodeString::operator>= (const UnicodeString& text) const
4002	{ return doCompare(start: `0`, thisLength: length(), srcText: text, srcStart: `0`, srcLength: text.length()) != -`1`; }
4003
4004	inline UBool
4005	UnicodeString::operator<= (const UnicodeString& text) const
4006	{ return doCompare(start: `0`, thisLength: length(), srcText: text, srcStart: `0`, srcLength: text.length()) != `1`; }
4007
4008	inline int8_t
4009	UnicodeString::compare(const UnicodeString& text) const
4010	{ return doCompare(start: `0`, thisLength: length(), srcText: text, srcStart: `0`, srcLength: text.length()); }
4011
4012	inline int8_t
4013	UnicodeString::compare(int32_t start,
4014	int32_t _length,
4015	const UnicodeString& srcText) const
4016	{ return doCompare(start, thisLength: _length, srcText, srcStart: `0`, srcLength: srcText.length()); }
4017
4018	inline int8_t
4019	UnicodeString::compare(ConstChar16Ptr srcChars,
4020	int32_t srcLength) const
4021	{ return doCompare(start: `0`, length: length(), srcChars, srcStart: `0`, srcLength); }
4022
4023	inline int8_t
4024	UnicodeString::compare(int32_t start,
4025	int32_t _length,
4026	const UnicodeString& srcText,
4027	int32_t srcStart,
4028	int32_t srcLength) const
4029	{ return doCompare(start, thisLength: _length, srcText, srcStart, srcLength); }
4030
4031	inline int8_t
4032	UnicodeString::compare(int32_t start,
4033	int32_t _length,
4034	const char16_t srcChars) const*
4035	{ return doCompare(start, length: _length, srcChars, srcStart: `0`, srcLength: _length); }
4036
4037	inline int8_t
4038	UnicodeString::compare(int32_t start,
4039	int32_t _length,
4040	const char16_t *srcChars,
4041	int32_t srcStart,
4042	int32_t srcLength) const
4043	{ return doCompare(start, length: _length, srcChars, srcStart, srcLength); }
4044
4045	inline int8_t
4046	UnicodeString::compareBetween(int32_t start,
4047	int32_t limit,
4048	const UnicodeString& srcText,
4049	int32_t srcStart,
4050	int32_t srcLimit) const
4051	{ return doCompare(start, thisLength: limit - start,
4052	srcText, srcStart, srcLength: srcLimit - srcStart); }
4053
4054	inline int8_t
4055	UnicodeString::doCompareCodePointOrder(int32_t start,
4056	int32_t thisLength,
4057	const UnicodeString& srcText,
4058	int32_t srcStart,
4059	int32_t srcLength) const
4060	{
4061	if(srcText.isBogus()) {
4062	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4063	} else {
4064	srcText.pinIndices(start&: srcStart, length&: srcLength);
4065	return doCompareCodePointOrder(start, length: thisLength, srcChars: srcText.getArrayStart(), srcStart, srcLength);
4066	}
4067	}
4068
4069	inline int8_t
4070	UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4071	{ return doCompareCodePointOrder(start: `0`, thisLength: length(), srcText: text, srcStart: `0`, srcLength: text.length()); }
4072
4073	inline int8_t
4074	UnicodeString::compareCodePointOrder(int32_t start,
4075	int32_t _length,
4076	const UnicodeString& srcText) const
4077	{ return doCompareCodePointOrder(start, thisLength: _length, srcText, srcStart: `0`, srcLength: srcText.length()); }
4078
4079	inline int8_t
4080	UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4081	int32_t srcLength) const
4082	{ return doCompareCodePointOrder(start: `0`, length: length(), srcChars, srcStart: `0`, srcLength); }
4083
4084	inline int8_t
4085	UnicodeString::compareCodePointOrder(int32_t start,
4086	int32_t _length,
4087	const UnicodeString& srcText,
4088	int32_t srcStart,
4089	int32_t srcLength) const
4090	{ return doCompareCodePointOrder(start, thisLength: _length, srcText, srcStart, srcLength); }
4091
4092	inline int8_t
4093	UnicodeString::compareCodePointOrder(int32_t start,
4094	int32_t _length,
4095	const char16_t srcChars) const*
4096	{ return doCompareCodePointOrder(start, length: _length, srcChars, srcStart: `0`, srcLength: _length); }
4097
4098	inline int8_t
4099	UnicodeString::compareCodePointOrder(int32_t start,
4100	int32_t _length,
4101	const char16_t *srcChars,
4102	int32_t srcStart,
4103	int32_t srcLength) const
4104	{ return doCompareCodePointOrder(start, length: _length, srcChars, srcStart, srcLength); }
4105
4106	inline int8_t
4107	UnicodeString::compareCodePointOrderBetween(int32_t start,
4108	int32_t limit,
4109	const UnicodeString& srcText,
4110	int32_t srcStart,
4111	int32_t srcLimit) const
4112	{ return doCompareCodePointOrder(start, thisLength: limit - start,
4113	srcText, srcStart, srcLength: srcLimit - srcStart); }
4114
4115	inline int8_t
4116	UnicodeString::doCaseCompare(int32_t start,
4117	int32_t thisLength,
4118	const UnicodeString &srcText,
4119	int32_t srcStart,
4120	int32_t srcLength,
4121	uint32_t options) const
4122	{
4123	if(srcText.isBogus()) {
4124	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4125	} else {
4126	srcText.pinIndices(start&: srcStart, length&: srcLength);
4127	return doCaseCompare(start, length: thisLength, srcChars: srcText.getArrayStart(), srcStart, srcLength, options);
4128	}
4129	}
4130
4131	inline int8_t
4132	UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4133	return doCaseCompare(start: `0`, thisLength: length(), srcText: text, srcStart: `0`, srcLength: text.length(), options);
4134	}
4135
4136	inline int8_t
4137	UnicodeString::caseCompare(int32_t start,
4138	int32_t _length,
4139	const UnicodeString &srcText,
4140	uint32_t options) const {
4141	return doCaseCompare(start, thisLength: _length, srcText, srcStart: `0`, srcLength: srcText.length(), options);
4142	}
4143
4144	inline int8_t
4145	UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4146	int32_t srcLength,
4147	uint32_t options) const {
4148	return doCaseCompare(start: `0`, length: length(), srcChars, srcStart: `0`, srcLength, options);
4149	}
4150
4151	inline int8_t
4152	UnicodeString::caseCompare(int32_t start,
4153	int32_t _length,
4154	const UnicodeString &srcText,
4155	int32_t srcStart,
4156	int32_t srcLength,
4157	uint32_t options) const {
4158	return doCaseCompare(start, thisLength: _length, srcText, srcStart, srcLength, options);
4159	}
4160
4161	inline int8_t
4162	UnicodeString::caseCompare(int32_t start,
4163	int32_t _length,
4164	const char16_t *srcChars,
4165	uint32_t options) const {
4166	return doCaseCompare(start, length: _length, srcChars, srcStart: `0`, srcLength: _length, options);
4167	}
4168
4169	inline int8_t
4170	UnicodeString::caseCompare(int32_t start,
4171	int32_t _length,
4172	const char16_t *srcChars,
4173	int32_t srcStart,
4174	int32_t srcLength,
4175	uint32_t options) const {
4176	return doCaseCompare(start, length: _length, srcChars, srcStart, srcLength, options);
4177	}
4178
4179	inline int8_t
4180	UnicodeString::caseCompareBetween(int32_t start,
4181	int32_t limit,
4182	const UnicodeString &srcText,
4183	int32_t srcStart,
4184	int32_t srcLimit,
4185	uint32_t options) const {
4186	return doCaseCompare(start, thisLength: limit - start, srcText, srcStart, srcLength: srcLimit - srcStart, options);
4187	}
4188
4189	inline int32_t
4190	UnicodeString::indexOf(const UnicodeString& srcText,
4191	int32_t srcStart,
4192	int32_t srcLength,
4193	int32_t start,
4194	int32_t _length) const
4195	{
4196	if(!srcText.isBogus()) {
4197	srcText.pinIndices(start&: srcStart, length&: srcLength);
4198	if(srcLength > `0`) {
4199	return indexOf(srcChars: srcText.getArrayStart(), srcStart, srcLength, start, length: _length);
4200	}
4201	}
4202	return -`1`;
4203	}
4204
4205	inline int32_t
4206	UnicodeString::indexOf(const UnicodeString& text) const
4207	{ return indexOf(srcText: text, srcStart: `0`, srcLength: text.length(), start: `0`, length: length()); }
4208
4209	inline int32_t
4210	UnicodeString::indexOf(const UnicodeString& text,
4211	int32_t start) const {
4212	pinIndex(start);
4213	return indexOf(srcText: text, srcStart: `0`, srcLength: text.length(), start, length: length() - start);
4214	}
4215
4216	inline int32_t
4217	UnicodeString::indexOf(const UnicodeString& text,
4218	int32_t start,
4219	int32_t _length) const
4220	{ return indexOf(srcText: text, srcStart: `0`, srcLength: text.length(), start, _length); }
4221
4222	inline int32_t
4223	UnicodeString::indexOf(const char16_t *srcChars,
4224	int32_t srcLength,
4225	int32_t start) const {
4226	pinIndex(start);
4227	return indexOf(srcChars, srcStart: `0`, srcLength, start, length: length() - start);
4228	}
4229
4230	inline int32_t
4231	UnicodeString::indexOf(ConstChar16Ptr srcChars,
4232	int32_t srcLength,
4233	int32_t start,
4234	int32_t _length) const
4235	{ return indexOf(srcChars, srcStart: `0`, srcLength, start, length: _length); }
4236
4237	inline int32_t
4238	UnicodeString::indexOf(char16_t c,
4239	int32_t start,
4240	int32_t _length) const
4241	{ return doIndexOf(c, start, length: _length); }
4242
4243	inline int32_t
4244	UnicodeString::indexOf(UChar32 c,
4245	int32_t start,
4246	int32_t _length) const
4247	{ return doIndexOf(c, start, length: _length); }
4248
4249	inline int32_t
4250	UnicodeString::indexOf(char16_t c) const
4251	{ return doIndexOf(c, start: `0`, length: length()); }
4252
4253	inline int32_t
4254	UnicodeString::indexOf(UChar32 c) const
4255	{ return indexOf(c, start: `0`, length: length()); }
4256
4257	inline int32_t
4258	UnicodeString::indexOf(char16_t c,
4259	int32_t start) const {
4260	pinIndex(start);
4261	return doIndexOf(c, start, length: length() - start);
4262	}
4263
4264	inline int32_t
4265	UnicodeString::indexOf(UChar32 c,
4266	int32_t start) const {
4267	pinIndex(start);
4268	return indexOf(c, start, length: length() - start);
4269	}
4270
4271	inline int32_t
4272	UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4273	int32_t srcLength,
4274	int32_t start,
4275	int32_t _length) const
4276	{ return lastIndexOf(srcChars, srcStart: `0`, srcLength, start, length: _length); }
4277
4278	inline int32_t
4279	UnicodeString::lastIndexOf(const char16_t *srcChars,
4280	int32_t srcLength,
4281	int32_t start) const {
4282	pinIndex(start);
4283	return lastIndexOf(srcChars, srcStart: `0`, srcLength, start, length: length() - start);
4284	}
4285
4286	inline int32_t
4287	UnicodeString::lastIndexOf(const UnicodeString& srcText,
4288	int32_t srcStart,
4289	int32_t srcLength,
4290	int32_t start,
4291	int32_t _length) const
4292	{
4293	if(!srcText.isBogus()) {
4294	srcText.pinIndices(start&: srcStart, length&: srcLength);
4295	if(srcLength > `0`) {
4296	return lastIndexOf(srcChars: srcText.getArrayStart(), srcStart, srcLength, start, length: _length);
4297	}
4298	}
4299	return -`1`;
4300	}
4301
4302	inline int32_t
4303	UnicodeString::lastIndexOf(const UnicodeString& text,
4304	int32_t start,
4305	int32_t _length) const
4306	{ return lastIndexOf(srcText: text, srcStart: `0`, srcLength: text.length(), start, _length); }
4307
4308	inline int32_t
4309	UnicodeString::lastIndexOf(const UnicodeString& text,
4310	int32_t start) const {
4311	pinIndex(start);
4312	return lastIndexOf(srcText: text, srcStart: `0`, srcLength: text.length(), start, length: length() - start);
4313	}
4314
4315	inline int32_t
4316	UnicodeString::lastIndexOf(const UnicodeString& text) const
4317	{ return lastIndexOf(srcText: text, srcStart: `0`, srcLength: text.length(), start: `0`, length: length()); }
4318
4319	inline int32_t
4320	UnicodeString::lastIndexOf(char16_t c,
4321	int32_t start,
4322	int32_t _length) const
4323	{ return doLastIndexOf(c, start, length: _length); }
4324
4325	inline int32_t
4326	UnicodeString::lastIndexOf(UChar32 c,
4327	int32_t start,
4328	int32_t _length) const {
4329	return doLastIndexOf(c, start, length: _length);
4330	}
4331
4332	inline int32_t
4333	UnicodeString::lastIndexOf(char16_t c) const
4334	{ return doLastIndexOf(c, start: `0`, length: length()); }
4335
4336	inline int32_t
4337	UnicodeString::lastIndexOf(UChar32 c) const {
4338	return lastIndexOf(c, start: `0`, length: length());
4339	}
4340
4341	inline int32_t
4342	UnicodeString::lastIndexOf(char16_t c,
4343	int32_t start) const {
4344	pinIndex(start);
4345	return doLastIndexOf(c, start, length: length() - start);
4346	}
4347
4348	inline int32_t
4349	UnicodeString::lastIndexOf(UChar32 c,
4350	int32_t start) const {
4351	pinIndex(start);
4352	return lastIndexOf(c, start, length: length() - start);
4353	}
4354
4355	inline UBool
4356	UnicodeString::startsWith(const UnicodeString& text) const
4357	{ return doEqualsSubstring(start: `0`, thisLength: text.length(), srcText: text, srcStart: `0`, srcLength: text.length()); }
4358
4359	inline UBool
4360	UnicodeString::startsWith(const UnicodeString& srcText,
4361	int32_t srcStart,
4362	int32_t srcLength) const
4363	{ return doEqualsSubstring(start: `0`, thisLength: srcLength, srcText, srcStart, srcLength); }
4364
4365	inline UBool
4366	UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4367	if(srcLength < `0`) {
4368	srcLength = u_strlen(s: toUCharPtr(p: srcChars));
4369	}
4370	return doEqualsSubstring(start: `0`, length: srcLength, srcChars, srcStart: `0`, srcLength);
4371	}
4372
4373	inline UBool
4374	UnicodeString::startsWith(const char16_t srcChars, int32_t srcStart, int32_t srcLength) const* {
4375	if(srcLength < `0`) {
4376	srcLength = u_strlen(s: toUCharPtr(p: srcChars));
4377	}
4378	return doEqualsSubstring(start: `0`, length: srcLength, srcChars, srcStart, srcLength);
4379	}
4380
4381	inline UBool
4382	UnicodeString::endsWith(const UnicodeString& text) const
4383	{ return doEqualsSubstring(start: length() - text.length(), thisLength: text.length(),
4384	srcText: text, srcStart: `0`, srcLength: text.length()); }
4385
4386	inline UBool
4387	UnicodeString::endsWith(const UnicodeString& srcText,
4388	int32_t srcStart,
4389	int32_t srcLength) const {
4390	srcText.pinIndices(start&: srcStart, length&: srcLength);
4391	return doEqualsSubstring(start: length() - srcLength, thisLength: srcLength,
4392	srcText, srcStart, srcLength);
4393	}
4394
4395	inline UBool
4396	UnicodeString::endsWith(ConstChar16Ptr srcChars,
4397	int32_t srcLength) const {
4398	if(srcLength < `0`) {
4399	srcLength = u_strlen(s: toUCharPtr(p: srcChars));
4400	}
4401	return doEqualsSubstring(start: length() - srcLength, length: srcLength, srcChars, srcStart: `0`, srcLength);
4402	}
4403
4404	inline UBool
4405	UnicodeString::endsWith(const char16_t *srcChars,
4406	int32_t srcStart,
4407	int32_t srcLength) const {
4408	if(srcLength < `0`) {
4409	srcLength = u_strlen(s: toUCharPtr(p: srcChars + srcStart));
4410	}
4411	return doEqualsSubstring(start: length() - srcLength, length: srcLength,
4412	srcChars, srcStart, srcLength);
4413	}
4414
4415	//========================================
4416	// replace
4417	//========================================
4418	inline UnicodeString&
4419	UnicodeString::replace(int32_t start,
4420	int32_t _length,
4421	const UnicodeString& srcText)
4422	{ return doReplace(start, length: _length, srcText, srcStart: `0`, srcLength: srcText.length()); }
4423
4424	inline UnicodeString&
4425	UnicodeString::replace(int32_t start,
4426	int32_t _length,
4427	const UnicodeString& srcText,
4428	int32_t srcStart,
4429	int32_t srcLength)
4430	{ return doReplace(start, length: _length, srcText, srcStart, srcLength); }
4431
4432	inline UnicodeString&
4433	UnicodeString::replace(int32_t start,
4434	int32_t _length,
4435	ConstChar16Ptr srcChars,
4436	int32_t srcLength)
4437	{ return doReplace(start, length: _length, srcChars, srcStart: `0`, srcLength); }
4438
4439	inline UnicodeString&
4440	UnicodeString::replace(int32_t start,
4441	int32_t _length,
4442	const char16_t *srcChars,
4443	int32_t srcStart,
4444	int32_t srcLength)
4445	{ return doReplace(start, length: _length, srcChars, srcStart, srcLength); }
4446
4447	inline UnicodeString&
4448	UnicodeString::replace(int32_t start,
4449	int32_t _length,
4450	char16_t srcChar)
4451	{ return doReplace(start, length: _length, srcChars: &srcChar, srcStart: `0`, srcLength: `1`); }
4452
4453	inline UnicodeString&
4454	UnicodeString::replaceBetween(int32_t start,
4455	int32_t limit,
4456	const UnicodeString& srcText)
4457	{ return doReplace(start, length: limit - start, srcText, srcStart: `0`, srcLength: srcText.length()); }
4458
4459	inline UnicodeString&
4460	UnicodeString::replaceBetween(int32_t start,
4461	int32_t limit,
4462	const UnicodeString& srcText,
4463	int32_t srcStart,
4464	int32_t srcLimit)
4465	{ return doReplace(start, length: limit - start, srcText, srcStart, srcLength: srcLimit - srcStart); }
4466
4467	inline UnicodeString&
4468	UnicodeString::findAndReplace(const UnicodeString& oldText,
4469	const UnicodeString& newText)
4470	{ return findAndReplace(start: `0`, length: length(), oldText, oldStart: `0`, oldLength: oldText.length(),
4471	newText, newStart: `0`, newLength: newText.length()); }
4472
4473	inline UnicodeString&
4474	UnicodeString::findAndReplace(int32_t start,
4475	int32_t _length,
4476	const UnicodeString& oldText,
4477	const UnicodeString& newText)
4478	{ return findAndReplace(start, length: _length, oldText, oldStart: `0`, oldLength: oldText.length(),
4479	newText, newStart: `0`, newLength: newText.length()); }
4480
4481	// ============================
4482	// extract
4483	// ============================
4484	inline void
4485	UnicodeString::doExtract(int32_t start,
4486	int32_t _length,
4487	UnicodeString& target) const
4488	{ target.replace(start: `0`, length: target.length(), srcText: *this, srcStart: start, srcLength: _length); }
4489
4490	inline void
4491	UnicodeString::extract(int32_t start,
4492	int32_t _length,
4493	Char16Ptr target,
4494	int32_t targetStart) const
4495	{ doExtract(start, length: _length, dst: target, dstStart: targetStart); }
4496
4497	inline void
4498	UnicodeString::extract(int32_t start,
4499	int32_t _length,
4500	UnicodeString& target) const
4501	{ doExtract(start, _length, target); }
4502
4503	#if !UCONFIG_NO_CONVERSION
4504
4505	inline int32_t
4506	UnicodeString::extract(int32_t start,
4507	int32_t _length,
4508	char *dst,
4509	const char codepage) const*
4510
4511	{
4512	// This dstSize value will be checked explicitly
4513	return extract(start, startLength: _length, target: dst, targetLength: dst!=`0` ? `0xffffffff` : `0`, codepage);
4514	}
4515
4516	#endif
4517
4518	inline void
4519	UnicodeString::extractBetween(int32_t start,
4520	int32_t limit,
4521	char16_t *dst,
4522	int32_t dstStart) const {
4523	pinIndex(start);
4524	pinIndex(start&: limit);
4525	doExtract(start, length: limit - start, dst, dstStart);
4526	}
4527
4528	inline UnicodeString
4529	UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4530	return tempSubString(start, length: limit - start);
4531	}
4532
4533	inline char16_t
4534	UnicodeString::doCharAt(int32_t offset) const
4535	{
4536	if((uint32_t)offset < (uint32_t)length()) {
4537	return getArrayStart()[offset];
4538	} else {
4539	return kInvalidUChar;
4540	}
4541	}
4542
4543	inline char16_t
4544	UnicodeString::charAt(int32_t offset) const
4545	{ return doCharAt(offset); }
4546
4547	inline char16_t
4548	UnicodeString::operator[] (int32_t offset) const
4549	{ return doCharAt(offset); }
4550
4551	inline UBool
4552	UnicodeString::isEmpty() const {
4553	// Arithmetic or logical right shift does not matter: only testing for 0.
4554	return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == `0`;
4555	}
4556
4557	//========================================
4558	// Write implementation methods
4559	//========================================
4560	inline void
4561	UnicodeString::setZeroLength() {
4562	fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4563	}
4564
4565	inline void
4566	UnicodeString::setShortLength(int32_t len) {
4567	// requires 0 <= len <= kMaxShortLength
4568	fUnion.fFields.fLengthAndFlags =
4569	(int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) \| (len << kLengthShift));
4570	}
4571
4572	inline void
4573	UnicodeString::setLength(int32_t len) {
4574	if(len <= kMaxShortLength) {
4575	setShortLength(len);
4576	} else {
4577	fUnion.fFields.fLengthAndFlags \|= kLengthIsLarge;
4578	fUnion.fFields.fLength = len;
4579	}
4580	}
4581
4582	inline void
4583	UnicodeString::setToEmpty() {
4584	fUnion.fFields.fLengthAndFlags = kShortString;
4585	}
4586
4587	inline void
4588	UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4589	setLength(len);
4590	fUnion.fFields.fArray = array;
4591	fUnion.fFields.fCapacity = capacity;
4592	}
4593
4594	inline UnicodeString&
4595	UnicodeString::operator= (char16_t ch)
4596	{ return doReplace(start: `0`, length: length(), srcChars: &ch, srcStart: `0`, srcLength: `1`); }
4597
4598	inline UnicodeString&
4599	UnicodeString::operator= (UChar32 ch)
4600	{ return replace(start: `0`, length: length(), srcChar: ch); }
4601
4602	inline UnicodeString&
4603	UnicodeString::setTo(const UnicodeString& srcText,
4604	int32_t srcStart,
4605	int32_t srcLength)
4606	{
4607	unBogus();
4608	return doReplace(start: `0`, length: length(), srcText, srcStart, srcLength);
4609	}
4610
4611	inline UnicodeString&
4612	UnicodeString::setTo(const UnicodeString& srcText,
4613	int32_t srcStart)
4614	{
4615	unBogus();
4616	srcText.pinIndex(start&: srcStart);
4617	return doReplace(start: `0`, length: length(), srcText, srcStart, srcLength: srcText.length() - srcStart);
4618	}
4619
4620	inline UnicodeString&
4621	UnicodeString::setTo(const UnicodeString& srcText)
4622	{
4623	return copyFrom(src: srcText);
4624	}
4625
4626	inline UnicodeString&
4627	UnicodeString::setTo(const char16_t *srcChars,
4628	int32_t srcLength)
4629	{
4630	unBogus();
4631	return doReplace(start: `0`, length: length(), srcChars, srcStart: `0`, srcLength);
4632	}
4633
4634	inline UnicodeString&
4635	UnicodeString::setTo(char16_t srcChar)
4636	{
4637	unBogus();
4638	return doReplace(start: `0`, length: length(), srcChars: &srcChar, srcStart: `0`, srcLength: `1`);
4639	}
4640
4641	inline UnicodeString&
4642	UnicodeString::setTo(UChar32 srcChar)
4643	{
4644	unBogus();
4645	return replace(start: `0`, length: length(), srcChar);
4646	}
4647
4648	inline UnicodeString&
4649	UnicodeString::append(const UnicodeString& srcText,
4650	int32_t srcStart,
4651	int32_t srcLength)
4652	{ return doAppend(src: srcText, srcStart, srcLength); }
4653
4654	inline UnicodeString&
4655	UnicodeString::append(const UnicodeString& srcText)
4656	{ return doAppend(src: srcText, srcStart: `0`, srcLength: srcText.length()); }
4657
4658	inline UnicodeString&
4659	UnicodeString::append(const char16_t *srcChars,
4660	int32_t srcStart,
4661	int32_t srcLength)
4662	{ return doAppend(srcChars, srcStart, srcLength); }
4663
4664	inline UnicodeString&
4665	UnicodeString::append(ConstChar16Ptr srcChars,
4666	int32_t srcLength)
4667	{ return doAppend(srcChars, srcStart: `0`, srcLength); }
4668
4669	inline UnicodeString&
4670	UnicodeString::append(char16_t srcChar)
4671	{ return doAppend(srcChars: &srcChar, srcStart: `0`, srcLength: `1`); }
4672
4673	inline UnicodeString&
4674	UnicodeString::operator+= (char16_t ch)
4675	{ return doAppend(srcChars: &ch, srcStart: `0`, srcLength: `1`); }
4676
4677	inline UnicodeString&
4678	UnicodeString::operator+= (UChar32 ch) {
4679	return append(srcChar: ch);
4680	}
4681
4682	inline UnicodeString&
4683	UnicodeString::operator+= (const UnicodeString& srcText)
4684	{ return doAppend(src: srcText, srcStart: `0`, srcLength: srcText.length()); }
4685
4686	inline UnicodeString&
4687	UnicodeString::insert(int32_t start,
4688	const UnicodeString& srcText,
4689	int32_t srcStart,
4690	int32_t srcLength)
4691	{ return doReplace(start, length: `0`, srcText, srcStart, srcLength); }
4692
4693	inline UnicodeString&
4694	UnicodeString::insert(int32_t start,
4695	const UnicodeString& srcText)
4696	{ return doReplace(start, length: `0`, srcText, srcStart: `0`, srcLength: srcText.length()); }
4697
4698	inline UnicodeString&
4699	UnicodeString::insert(int32_t start,
4700	const char16_t *srcChars,
4701	int32_t srcStart,
4702	int32_t srcLength)
4703	{ return doReplace(start, length: `0`, srcChars, srcStart, srcLength); }
4704
4705	inline UnicodeString&
4706	UnicodeString::insert(int32_t start,
4707	ConstChar16Ptr srcChars,
4708	int32_t srcLength)
4709	{ return doReplace(start, length: `0`, srcChars, srcStart: `0`, srcLength); }
4710
4711	inline UnicodeString&
4712	UnicodeString::insert(int32_t start,
4713	char16_t srcChar)
4714	{ return doReplace(start, length: `0`, srcChars: &srcChar, srcStart: `0`, srcLength: `1`); }
4715
4716	inline UnicodeString&
4717	UnicodeString::insert(int32_t start,
4718	UChar32 srcChar)
4719	{ return replace(start, length: `0`, srcChar); }
4720
4721
4722	inline UnicodeString&
4723	UnicodeString::remove()
4724	{
4725	// remove() of a bogus string makes the string empty and non-bogus
4726	if(isBogus()) {
4727	setToEmpty();
4728	} else {
4729	setZeroLength();
4730	}
4731	return *this;
4732	}
4733
4734	inline UnicodeString&
4735	UnicodeString::remove(int32_t start,
4736	int32_t _length)
4737	{
4738	if(start <= `0` && _length == INT32_MAX) {
4739	// remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4740	return remove();
4741	}
4742	return doReplace(start, length: _length, srcChars: nullptr, srcStart: `0`, srcLength: `0`);
4743	}
4744
4745	inline UnicodeString&
4746	UnicodeString::removeBetween(int32_t start,
4747	int32_t limit)
4748	{ return doReplace(start, length: limit - start, srcChars: nullptr, srcStart: `0`, srcLength: `0`); }
4749
4750	inline UnicodeString &
4751	UnicodeString::retainBetween(int32_t start, int32_t limit) {
4752	truncate(targetLength: limit);
4753	return doReplace(start: `0`, length: start, srcChars: nullptr, srcStart: `0`, srcLength: `0`);
4754	}
4755
4756	inline UBool
4757	UnicodeString::truncate(int32_t targetLength)
4758	{
4759	if(isBogus() && targetLength == `0`) {
4760	// truncate(0) of a bogus string makes the string empty and non-bogus
4761	unBogus();
4762	return false;
4763	} else if((uint32_t)targetLength < (uint32_t)length()) {
4764	setLength(targetLength);
4765	return true;
4766	} else {
4767	return false;
4768	}
4769	}
4770
4771	inline UnicodeString&
4772	UnicodeString::reverse()
4773	{ return doReverse(start: `0`, length: length()); }
4774
4775	inline UnicodeString&
4776	UnicodeString::reverse(int32_t start,
4777	int32_t _length)
4778	{ return doReverse(start, length: _length); }
4779
4780	U_NAMESPACE_END
4781
4782	#endif /* U_SHOW_CPLUSPLUS_API */
4783
4784	#endif
4785

source code of include/unicode/unistr.h