1// Copyright (C) 2021 The Qt Company Ltd.
2// Copyright (C) 2022 Intel Corporation.
3// Copyright (C) 2019 Mail.ru Group.
4// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
5
6#include "qstringlist.h"
7#if QT_CONFIG(regularexpression)
8#include "qregularexpression.h"
9#endif
10#include "qunicodetables_p.h"
11#include <private/qstringconverter_p.h>
12#include <private/qtools_p.h>
13#include "qlocale_tools_p.h"
14#include "private/qsimd_p.h"
15#include <qnumeric.h>
16#include <qdatastream.h>
17#include <qlist.h>
18#include "qlocale.h"
19#include "qlocale_p.h"
20#include "qspan.h"
21#include "qstringbuilder.h"
22#include "qstringmatcher.h"
23#include "qvarlengtharray.h"
24#include "qdebug.h"
25#include "qendian.h"
26#include "qcollator.h"
27#include "qttypetraits.h"
28
29#ifdef Q_OS_DARWIN
30#include <private/qcore_mac_p.h>
31#endif
32
33#include <private/qfunctions_p.h>
34
35#include <limits.h>
36#include <string.h>
37#include <stdlib.h>
38#include <stdio.h>
39#include <stdarg.h>
40#include <wchar.h>
41
42#include "qchar.cpp"
43#include "qlatin1stringmatcher.h"
44#include "qstringmatcher.cpp"
45#include "qstringiterator_p.h"
46#include "qstringalgorithms_p.h"
47#include "qthreadstorage.h"
48
49#include <algorithm>
50#include <functional>
51
52#ifdef Q_OS_WIN
53# include <qt_windows.h>
54# if !defined(QT_BOOTSTRAPPED) && (defined(QT_NO_CAST_FROM_ASCII) || defined(QT_NO_CAST_TO_ASCII))
55// MSVC requires this, but let's apply it to MinGW compilers too, just in case
56# error "This file cannot be compiled with QT_NO_CAST_{TO,FROM}_ASCII, " \
57 "otherwise some QString functions will not get exported."
58# endif
59#endif
60
61#ifdef truncate
62# undef truncate
63#endif
64
65#define REHASH(a) \
66 if (sl_minus_1 < sizeof(sl_minus_1) * CHAR_BIT) \
67 hashHaystack -= decltype(hashHaystack)(a) << sl_minus_1; \
68 hashHaystack <<= 1
69
70QT_BEGIN_NAMESPACE
71
72using namespace Qt::StringLiterals;
73using namespace QtMiscUtils;
74
75const char16_t QString::_empty = 0;
76
77// in qstringmatcher.cpp
78qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs);
79
80namespace {
81enum StringComparisonMode {
82 CompareStringsForEquality,
83 CompareStringsForOrdering
84};
85
86template <typename Pointer>
87char32_t foldCaseHelper(Pointer ch, Pointer start) = delete;
88
89template <>
90char32_t foldCaseHelper<const QChar*>(const QChar* ch, const QChar* start)
91{
92 return foldCase(ch: reinterpret_cast<const char16_t*>(ch),
93 start: reinterpret_cast<const char16_t*>(start));
94}
95
96template <>
97char32_t foldCaseHelper<const char*>(const char* ch, const char*)
98{
99 return foldCase(ch: char16_t(uchar(*ch)));
100}
101
102template <typename T>
103char16_t valueTypeToUtf16(T t) = delete;
104
105template <>
106char16_t valueTypeToUtf16<QChar>(QChar t)
107{
108 return t.unicode();
109}
110
111template <>
112char16_t valueTypeToUtf16<char>(char t)
113{
114 return char16_t{uchar(t)};
115}
116
117template <typename T>
118static inline bool foldAndCompare(const T a, const T b)
119{
120 return foldCase(a) == b;
121}
122
123/*!
124 \internal
125
126 Returns the index position of the first occurrence of the
127 character \a ch in the string given by \a str and \a len,
128 searching forward from index
129 position \a from. Returns -1 if \a ch could not be found.
130*/
131template <typename Haystack>
132static inline qsizetype qLastIndexOf(Haystack haystack, QChar needle,
133 qsizetype from, Qt::CaseSensitivity cs) noexcept
134{
135 if (haystack.size() == 0)
136 return -1;
137 if (from < 0)
138 from += haystack.size();
139 else if (std::size_t(from) > std::size_t(haystack.size()))
140 from = haystack.size() - 1;
141 if (from >= 0) {
142 char16_t c = needle.unicode();
143 const auto b = haystack.data();
144 auto n = b + from;
145 if (cs == Qt::CaseSensitive) {
146 for (; n >= b; --n)
147 if (valueTypeToUtf16(*n) == c)
148 return n - b;
149 } else {
150 c = foldCase(ch: c);
151 for (; n >= b; --n)
152 if (foldCase(valueTypeToUtf16(*n)) == c)
153 return n - b;
154 }
155 }
156 return -1;
157}
158template <> qsizetype
159qLastIndexOf(QString, QChar, qsizetype, Qt::CaseSensitivity) noexcept = delete; // unwanted, would detach
160
161template<typename Haystack, typename Needle>
162static qsizetype qLastIndexOf(Haystack haystack0, qsizetype from,
163 Needle needle0, Qt::CaseSensitivity cs) noexcept
164{
165 const qsizetype sl = needle0.size();
166 if (sl == 1)
167 return qLastIndexOf(haystack0, needle0.front(), from, cs);
168
169 const qsizetype l = haystack0.size();
170 if (from < 0)
171 from += l;
172 if (from == l && sl == 0)
173 return from;
174 const qsizetype delta = l - sl;
175 if (std::size_t(from) > std::size_t(l) || delta < 0)
176 return -1;
177 if (from > delta)
178 from = delta;
179
180 auto sv = [sl](const typename Haystack::value_type *v) { return Haystack(v, sl); };
181
182 auto haystack = haystack0.data();
183 const auto needle = needle0.data();
184 const auto *end = haystack;
185 haystack += from;
186 const qregisteruint sl_minus_1 = sl ? sl - 1 : 0;
187 const auto *n = needle + sl_minus_1;
188 const auto *h = haystack + sl_minus_1;
189 qregisteruint hashNeedle = 0, hashHaystack = 0;
190
191 if (cs == Qt::CaseSensitive) {
192 for (qsizetype idx = 0; idx < sl; ++idx) {
193 hashNeedle = (hashNeedle << 1) + valueTypeToUtf16(*(n - idx));
194 hashHaystack = (hashHaystack << 1) + valueTypeToUtf16(*(h - idx));
195 }
196 hashHaystack -= valueTypeToUtf16(*haystack);
197
198 while (haystack >= end) {
199 hashHaystack += valueTypeToUtf16(*haystack);
200 if (hashHaystack == hashNeedle
201 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
202 return haystack - end;
203 --haystack;
204 REHASH(valueTypeToUtf16(haystack[sl]));
205 }
206 } else {
207 for (qsizetype idx = 0; idx < sl; ++idx) {
208 hashNeedle = (hashNeedle << 1) + foldCaseHelper(n - idx, needle);
209 hashHaystack = (hashHaystack << 1) + foldCaseHelper(h - idx, end);
210 }
211 hashHaystack -= foldCaseHelper(haystack, end);
212
213 while (haystack >= end) {
214 hashHaystack += foldCaseHelper(haystack, end);
215 if (hashHaystack == hashNeedle
216 && QtPrivate::compareStrings(sv(haystack), needle0, Qt::CaseInsensitive) == 0)
217 return haystack - end;
218 --haystack;
219 REHASH(foldCaseHelper(haystack + sl, end));
220 }
221 }
222 return -1;
223}
224
225template <typename Haystack, typename Needle>
226bool qt_starts_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
227{
228 if (haystack.isNull())
229 return needle.isNull();
230 const auto haystackLen = haystack.size();
231 const auto needleLen = needle.size();
232 if (haystackLen == 0)
233 return needleLen == 0;
234 if (needleLen > haystackLen)
235 return false;
236
237 return QtPrivate::compareStrings(haystack.first(needleLen), needle, cs) == 0;
238}
239
240template <typename Haystack, typename Needle>
241bool qt_ends_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
242{
243 if (haystack.isNull())
244 return needle.isNull();
245 const auto haystackLen = haystack.size();
246 const auto needleLen = needle.size();
247 if (haystackLen == 0)
248 return needleLen == 0;
249 if (haystackLen < needleLen)
250 return false;
251
252 return QtPrivate::compareStrings(haystack.last(needleLen), needle, cs) == 0;
253}
254
255template <typename T>
256static void append_helper(QString &self, T view)
257{
258 const auto strData = view.data();
259 const qsizetype strSize = view.size();
260 auto &d = self.data_ptr();
261 if (strData && strSize > 0) {
262 // the number of UTF-8 code units is always at a minimum equal to the number
263 // of equivalent UTF-16 code units
264 d.detachAndGrow(where: QArrayData::GrowsAtEnd, n: strSize, data: nullptr, old: nullptr);
265 Q_CHECK_PTR(d.data());
266 Q_ASSERT(strSize <= d.freeSpaceAtEnd());
267
268 auto dst = std::next(x: d.data(), n: d.size);
269 if constexpr (std::is_same_v<T, QUtf8StringView>) {
270 dst = QUtf8::convertToUnicode(dst, view);
271 } else if constexpr (std::is_same_v<T, QLatin1StringView>) {
272 QLatin1::convertToUnicode(dst, view);
273 dst += strSize;
274 } else {
275 static_assert(QtPrivate::type_dependent_false<T>(),
276 "Can only operate on UTF-8 and Latin-1");
277 }
278 self.resize(size: std::distance(first: d.begin(), last: dst));
279 } else if (d.isNull() && !view.isNull()) { // special case
280 self = QLatin1StringView("");
281 }
282}
283
284template <uint MaxCount> struct UnrollTailLoop
285{
286 template <typename RetType, typename Functor1, typename Functor2, typename Number>
287 static inline RetType exec(Number count, RetType returnIfExited, Functor1 loopCheck, Functor2 returnIfFailed, Number i = 0)
288 {
289 /* equivalent to:
290 * while (count--) {
291 * if (loopCheck(i))
292 * return returnIfFailed(i);
293 * }
294 * return returnIfExited;
295 */
296
297 if (!count)
298 return returnIfExited;
299
300 bool check = loopCheck(i);
301 if (check)
302 return returnIfFailed(i);
303
304 return UnrollTailLoop<MaxCount - 1>::exec(count - 1, returnIfExited, loopCheck, returnIfFailed, i + 1);
305 }
306
307 template <typename Functor, typename Number>
308 static inline void exec(Number count, Functor code)
309 {
310 /* equivalent to:
311 * for (Number i = 0; i < count; ++i)
312 * code(i);
313 */
314 exec(count, 0, [=](Number i) -> bool { code(i); return false; }, [](Number) { return 0; });
315 }
316};
317template <> template <typename RetType, typename Functor1, typename Functor2, typename Number>
318inline RetType UnrollTailLoop<0>::exec(Number, RetType returnIfExited, Functor1, Functor2, Number)
319{
320 return returnIfExited;
321}
322} // unnamed namespace
323
324/*
325 * Note on the use of SIMD in qstring.cpp:
326 *
327 * Several operations with strings are improved with the use of SIMD code,
328 * since they are repetitive. For MIPS, we have hand-written assembly code
329 * outside of qstring.cpp targeting MIPS DSP and MIPS DSPr2. For ARM and for
330 * x86, we can only use intrinsics and therefore everything is contained in
331 * qstring.cpp. We need to use intrinsics only for those platforms due to the
332 * different compilers and toolchains used, which have different syntax for
333 * assembly sources.
334 *
335 * ** SSE notes: **
336 *
337 * Whenever multiple alternatives are equivalent or near so, we prefer the one
338 * using instructions from SSE2, since SSE2 is guaranteed to be enabled for all
339 * 64-bit builds and we enable it for 32-bit builds by default. Use of higher
340 * SSE versions should be done when there is a clear performance benefit and
341 * requires fallback code to SSE2, if it exists.
342 *
343 * Performance measurement in the past shows that most strings are short in
344 * size and, therefore, do not benefit from alignment prologues. That is,
345 * trying to find a 16-byte-aligned boundary to operate on is often more
346 * expensive than executing the unaligned operation directly. In addition, note
347 * that the QString private data is designed so that the data is stored on
348 * 16-byte boundaries if the system malloc() returns 16-byte aligned pointers
349 * on its own (64-bit glibc on Linux does; 32-bit glibc on Linux returns them
350 * 50% of the time), so skipping the alignment prologue is actually optimizing
351 * for the common case.
352 */
353
354#if defined(__mips_dsp)
355// From qstring_mips_dsp_asm.S
356extern "C" void qt_fromlatin1_mips_asm_unroll4 (char16_t*, const char*, uint);
357extern "C" void qt_fromlatin1_mips_asm_unroll8 (char16_t*, const char*, uint);
358extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const char16_t *src, int length);
359#endif
360
361#if defined(__SSE2__) && defined(Q_CC_GNU)
362// We may overrun the buffer, but that's a false positive:
363// this won't crash nor produce incorrect results
364# define ATTRIBUTE_NO_SANITIZE __attribute__((__no_sanitize_address__, __no_sanitize_thread__))
365#else
366# define ATTRIBUTE_NO_SANITIZE
367#endif
368
369#ifdef __SSE2__
370static constexpr bool UseSse4_1 = bool(qCompilerCpuFeatures & CpuFeatureSSE4_1);
371static constexpr bool UseAvx2 = UseSse4_1 &&
372 (qCompilerCpuFeatures & CpuFeatureArchHaswell) == CpuFeatureArchHaswell;
373
374[[maybe_unused]]
375static Q_ALWAYS_INLINE __m128i mm_load8_zero_extend(const void *ptr)
376{
377 const __m128i *dataptr = static_cast<const __m128i *>(ptr);
378 if constexpr (UseSse4_1) {
379 // use a MOVQ followed by PMOVZXBW
380 // if AVX2 is present, these should combine into a single VPMOVZXBW instruction
381 __m128i data = _mm_loadl_epi64(p: dataptr);
382 return _mm_cvtepu8_epi16(V: data);
383 }
384
385 // use MOVQ followed by PUNPCKLBW
386 __m128i data = _mm_loadl_epi64(p: dataptr);
387 return _mm_unpacklo_epi8(a: data, b: _mm_setzero_si128());
388}
389
390[[maybe_unused]] ATTRIBUTE_NO_SANITIZE
391static qsizetype qustrlen_sse2(const char16_t *str) noexcept
392{
393 // find the 16-byte alignment immediately prior or equal to str
394 quintptr misalignment = quintptr(str) & 0xf;
395 Q_ASSERT((misalignment & 1) == 0);
396 const char16_t *ptr = str - (misalignment / 2);
397
398 // load 16 bytes and see if we have a null
399 // (aligned loads can never segfault)
400 const __m128i zeroes = _mm_setzero_si128();
401 __m128i data = _mm_load_si128(p: reinterpret_cast<const __m128i *>(ptr));
402 __m128i comparison = _mm_cmpeq_epi16(a: data, b: zeroes);
403 uint mask = _mm_movemask_epi8(a: comparison);
404
405 // ignore the result prior to the beginning of str
406 mask >>= misalignment;
407
408 // Have we found something in the first block? Need to handle it now
409 // because of the left shift above.
410 if (mask)
411 return qCountTrailingZeroBits(v: mask) / sizeof(char16_t);
412
413 constexpr qsizetype Step = sizeof(__m128i) / sizeof(char16_t);
414 qsizetype size = Step - misalignment / sizeof(char16_t);
415
416 size -= Step;
417 do {
418 size += Step;
419 data = _mm_load_si128(p: reinterpret_cast<const __m128i *>(str + size));
420
421 comparison = _mm_cmpeq_epi16(a: data, b: zeroes);
422 mask = _mm_movemask_epi8(a: comparison);
423 } while (mask == 0);
424
425 // found a null
426 return size + qCountTrailingZeroBits(v: mask) / sizeof(char16_t);
427}
428
429// Scans from \a ptr to \a end until \a maskval is non-zero. Returns true if
430// the no non-zero was found. Returns false and updates \a ptr to point to the
431// first 16-bit word that has any bit set (note: if the input is 8-bit, \a ptr
432// may be updated to one byte short).
433static bool simdTestMask(const char *&ptr, const char *end, quint32 maskval)
434{
435 auto updatePtr = [&](uint result) {
436 // found a character matching the mask
437 uint idx = qCountTrailingZeroBits(v: ~result);
438 ptr += idx;
439 return false;
440 };
441
442 if constexpr (UseSse4_1) {
443# ifndef Q_OS_QNX // compiler fails in the code below
444 __m128i mask;
445 auto updatePtrSimd = [&](__m128i data) -> bool {
446 __m128i masked = _mm_and_si128(a: mask, b: data);
447 __m128i comparison = _mm_cmpeq_epi16(a: masked, b: _mm_setzero_si128());
448 uint result = _mm_movemask_epi8(a: comparison);
449 return updatePtr(result);
450 };
451
452 if constexpr (UseAvx2) {
453 // AVX2 implementation: test 32 bytes at a time
454 const __m256i mask256 = _mm256_broadcastd_epi32(X: _mm_cvtsi32_si128(a: maskval));
455 while (ptr + 32 <= end) {
456 __m256i data = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(ptr));
457 if (!_mm256_testz_si256(a: mask256, b: data)) {
458 // found a character matching the mask
459 __m256i masked256 = _mm256_and_si256(a: mask256, b: data);
460 __m256i comparison256 = _mm256_cmpeq_epi16(a: masked256, b: _mm256_setzero_si256());
461 return updatePtr(_mm256_movemask_epi8(a: comparison256));
462 }
463 ptr += 32;
464 }
465
466 mask = _mm256_castsi256_si128(a: mask256);
467 } else {
468 // SSE 4.1 implementation: test 32 bytes at a time (two 16-byte
469 // comparisons, unrolled)
470 mask = _mm_set1_epi32(i: maskval);
471 while (ptr + 32 <= end) {
472 __m128i data1 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(ptr));
473 __m128i data2 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(ptr + 16));
474 if (!_mm_testz_si128(M: mask, V: data1))
475 return updatePtrSimd(data1);
476
477 ptr += 16;
478 if (!_mm_testz_si128(M: mask, V: data2))
479 return updatePtrSimd(data2);
480 ptr += 16;
481 }
482 }
483
484 // AVX2 and SSE4.1: final 16-byte comparison
485 if (ptr + 16 <= end) {
486 __m128i data1 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(ptr));
487 if (!_mm_testz_si128(M: mask, V: data1))
488 return updatePtrSimd(data1);
489 ptr += 16;
490 }
491
492 // and final 8-byte comparison
493 if (ptr + 8 <= end) {
494 __m128i data1 = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(ptr));
495 if (!_mm_testz_si128(M: mask, V: data1))
496 return updatePtrSimd(data1);
497 ptr += 8;
498 }
499
500 return true;
501# endif // QNX
502 }
503
504 // SSE2 implementation: test 16 bytes at a time.
505 const __m128i mask = _mm_set1_epi32(i: maskval);
506 while (ptr + 16 <= end) {
507 __m128i data = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(ptr));
508 __m128i masked = _mm_and_si128(a: mask, b: data);
509 __m128i comparison = _mm_cmpeq_epi16(a: masked, b: _mm_setzero_si128());
510 quint16 result = _mm_movemask_epi8(a: comparison);
511 if (result != 0xffff)
512 return updatePtr(result);
513 ptr += 16;
514 }
515
516 // and one 8-byte comparison
517 if (ptr + 8 <= end) {
518 __m128i data = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(ptr));
519 __m128i masked = _mm_and_si128(a: mask, b: data);
520 __m128i comparison = _mm_cmpeq_epi16(a: masked, b: _mm_setzero_si128());
521 quint8 result = _mm_movemask_epi8(a: comparison);
522 if (result != 0xff)
523 return updatePtr(result);
524 ptr += 8;
525 }
526
527 return true;
528}
529
530template <StringComparisonMode Mode, typename Char> [[maybe_unused]]
531static int ucstrncmp_sse2(const char16_t *a, const Char *b, size_t l)
532{
533 static_assert(std::is_unsigned_v<Char>);
534
535 // Using the PMOVMSKB instruction, we get two bits for each UTF-16 character
536 // we compare. This lambda helps extract the code unit.
537 static const auto codeUnitAt = [](const auto *n, qptrdiff idx) -> int {
538 constexpr int Stride = 2;
539 // this is the same as:
540 // return n[idx / Stride];
541 // but using pointer arithmetic to avoid the compiler dividing by two
542 // and multiplying by two in the case of char16_t (we know idx is even,
543 // but the compiler does not). This is not UB.
544
545 auto ptr = reinterpret_cast<const uchar *>(n);
546 ptr += idx / (Stride / sizeof(*n));
547 return *reinterpret_cast<decltype(n)>(ptr);
548 };
549 auto difference = [a, b](uint mask, qptrdiff offset) {
550 if (Mode == CompareStringsForEquality)
551 return 1;
552 uint idx = qCountTrailingZeroBits(v: mask);
553 return codeUnitAt(a + offset, idx) - codeUnitAt(b + offset, idx);
554 };
555
556 static const auto load8Chars = [](const auto *ptr) {
557 if (sizeof(*ptr) == 2)
558 return _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(ptr));
559 __m128i chunk = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(ptr));
560 return _mm_unpacklo_epi8(a: chunk, b: _mm_setzero_si128());
561 };
562 static const auto load4Chars = [](const auto *ptr) {
563 if (sizeof(*ptr) == 2)
564 return _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(ptr));
565 __m128i chunk = _mm_cvtsi32_si128(qFromUnaligned<quint32>(ptr));
566 return _mm_unpacklo_epi8(a: chunk, b: _mm_setzero_si128());
567 };
568
569 // we're going to read a[0..15] and b[0..15] (32 bytes)
570 auto processChunk16Chars = [a, b](qptrdiff offset) -> uint {
571 if constexpr (UseAvx2) {
572 __m256i a_data = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(a + offset));
573 __m256i b_data;
574 if (sizeof(Char) == 1) {
575 // expand to UTF-16 via zero-extension
576 __m128i chunk = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(b + offset));
577 b_data = _mm256_cvtepu8_epi16(V: chunk);
578 } else {
579 b_data = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(b + offset));
580 }
581 __m256i result = _mm256_cmpeq_epi16(a: a_data, b: b_data);
582 return _mm256_movemask_epi8(a: result);
583 }
584
585 __m128i a_data1 = load8Chars(a + offset);
586 __m128i a_data2 = load8Chars(a + offset + 8);
587 __m128i b_data1, b_data2;
588 if (sizeof(Char) == 1) {
589 // expand to UTF-16 via unpacking
590 __m128i b_data = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(b + offset));
591 b_data1 = _mm_unpacklo_epi8(a: b_data, b: _mm_setzero_si128());
592 b_data2 = _mm_unpackhi_epi8(a: b_data, b: _mm_setzero_si128());
593 } else {
594 b_data1 = load8Chars(b + offset);
595 b_data2 = load8Chars(b + offset + 8);
596 }
597 __m128i result1 = _mm_cmpeq_epi16(a: a_data1, b: b_data1);
598 __m128i result2 = _mm_cmpeq_epi16(a: a_data2, b: b_data2);
599 return _mm_movemask_epi8(a: result1) | _mm_movemask_epi8(a: result2) << 16;
600 };
601
602 if (l >= sizeof(__m256i) / sizeof(char16_t)) {
603 qptrdiff offset = 0;
604 for ( ; l >= offset + sizeof(__m256i) / sizeof(char16_t); offset += sizeof(__m256i) / sizeof(char16_t)) {
605 uint mask = ~processChunk16Chars(offset);
606 if (mask)
607 return difference(mask, offset);
608 }
609
610 // maybe overlap the last 32 bytes
611 if (size_t(offset) < l) {
612 offset = l - sizeof(__m256i) / sizeof(char16_t);
613 uint mask = ~processChunk16Chars(offset);
614 return mask ? difference(mask, offset) : 0;
615 }
616 } else if (l >= 4) {
617 __m128i a_data1, b_data1;
618 __m128i a_data2, b_data2;
619 int width;
620 if (l >= 8) {
621 width = 8;
622 a_data1 = load8Chars(a);
623 b_data1 = load8Chars(b);
624 a_data2 = load8Chars(a + l - width);
625 b_data2 = load8Chars(b + l - width);
626 } else {
627 // we're going to read a[0..3] and b[0..3] (8 bytes)
628 width = 4;
629 a_data1 = load4Chars(a);
630 b_data1 = load4Chars(b);
631 a_data2 = load4Chars(a + l - width);
632 b_data2 = load4Chars(b + l - width);
633 }
634
635 __m128i result = _mm_cmpeq_epi16(a: a_data1, b: b_data1);
636 ushort mask = ~_mm_movemask_epi8(a: result);
637 if (mask)
638 return difference(mask, 0);
639
640 result = _mm_cmpeq_epi16(a: a_data2, b: b_data2);
641 mask = ~_mm_movemask_epi8(a: result);
642 if (mask)
643 return difference(mask, l - width);
644 } else {
645 // reset l
646 l &= 3;
647
648 const auto lambda = [=](size_t i) -> int {
649 return a[i] - b[i];
650 };
651 return UnrollTailLoop<3>::exec(l, 0, lambda, lambda);
652 }
653 return 0;
654}
655#endif
656
657Q_NEVER_INLINE
658qsizetype QtPrivate::qustrlen(const char16_t *str) noexcept
659{
660#if defined(__SSE2__) && !(defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)) && !(defined(__SANITIZE_THREAD__) || __has_feature(thread_sanitizer))
661 return qustrlen_sse2(str);
662#endif
663
664 if (sizeof(wchar_t) == sizeof(char16_t))
665 return wcslen(s: reinterpret_cast<const wchar_t *>(str));
666
667 qsizetype result = 0;
668 while (*str++)
669 ++result;
670 return result;
671}
672
673qsizetype QtPrivate::qustrnlen(const char16_t *str, qsizetype maxlen) noexcept
674{
675 return qustrchr(str: { str, maxlen }, ch: u'\0') - str;
676}
677
678/*!
679 * \internal
680 *
681 * Searches for character \a c in the string \a str and returns a pointer to
682 * it. Unlike strchr() and wcschr() (but like glibc's strchrnul()), if the
683 * character is not found, this function returns a pointer to the end of the
684 * string -- that is, \c{str.end()}.
685 */
686Q_NEVER_INLINE
687const char16_t *QtPrivate::qustrchr(QStringView str, char16_t c) noexcept
688{
689 const char16_t *n = str.utf16();
690 const char16_t *e = n + str.size();
691
692#ifdef __SSE2__
693 bool loops = true;
694 // Using the PMOVMSKB instruction, we get two bits for each character
695 // we compare.
696 __m128i mch;
697 if constexpr (UseAvx2) {
698 // we're going to read n[0..15] (32 bytes)
699 __m256i mch256 = _mm256_set1_epi32(i: c | (c << 16));
700 for (const char16_t *next = n + 16; next <= e; n = next, next += 16) {
701 __m256i data = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(n));
702 __m256i result = _mm256_cmpeq_epi16(a: data, b: mch256);
703 uint mask = uint(_mm256_movemask_epi8(a: result));
704 if (mask) {
705 uint idx = qCountTrailingZeroBits(v: mask);
706 return n + idx / 2;
707 }
708 }
709 loops = false;
710 mch = _mm256_castsi256_si128(a: mch256);
711 } else {
712 mch = _mm_set1_epi32(i: c | (c << 16));
713 }
714
715 auto hasMatch = [mch, &n](__m128i data, ushort validityMask) {
716 __m128i result = _mm_cmpeq_epi16(a: data, b: mch);
717 uint mask = uint(_mm_movemask_epi8(a: result));
718 if ((mask & validityMask) == 0)
719 return false;
720 uint idx = qCountTrailingZeroBits(v: mask);
721 n += idx / 2;
722 return true;
723 };
724
725 // we're going to read n[0..7] (16 bytes)
726 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
727 __m128i data = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(n));
728 if (hasMatch(data, 0xffff))
729 return n;
730
731 if (!loops) {
732 n += 8;
733 break;
734 }
735 }
736
737# if !defined(__OPTIMIZE_SIZE__)
738 // we're going to read n[0..3] (8 bytes)
739 if (e - n > 3) {
740 __m128i data = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(n));
741 if (hasMatch(data, 0xff))
742 return n;
743
744 n += 4;
745 }
746
747 return UnrollTailLoop<3>::exec(count: e - n, returnIfExited: e,
748 loopCheck: [=](qsizetype i) { return n[i] == c; },
749 returnIfFailed: [=](qsizetype i) { return n + i; });
750# endif
751#elif defined(__ARM_NEON__)
752 const uint16x8_t vmask = qvsetq_n_u16(1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7);
753 const uint16x8_t ch_vec = vdupq_n_u16(c);
754 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
755 uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t *>(n));
756 uint mask = vaddvq_u16(vandq_u16(vceqq_u16(data, ch_vec), vmask));
757 if (ushort(mask)) {
758 // found a match
759 return n + qCountTrailingZeroBits(mask);
760 }
761 }
762#endif // aarch64
763
764 return std::find(first: n, last: e, val: c);
765}
766
767/*!
768 * \internal
769 *
770 * Searches case-insensitively for character \a c in the string \a str and
771 * returns a pointer to it. Iif the character is not found, this function
772 * returns a pointer to the end of the string -- that is, \c{str.end()}.
773 */
774Q_NEVER_INLINE
775const char16_t *QtPrivate::qustrcasechr(QStringView str, char16_t c) noexcept
776{
777 const QChar *n = str.begin();
778 const QChar *e = str.end();
779 c = foldCase(ch: c);
780 auto it = std::find_if(first: n, last: e, pred: [c](auto ch) { return foldAndCompare(ch, QChar(c)); });
781 return reinterpret_cast<const char16_t *>(it);
782}
783
784// Note: ptr on output may be off by one and point to a preceding US-ASCII
785// character. Usually harmless.
786bool qt_is_ascii(const char *&ptr, const char *end) noexcept
787{
788#if defined(__SSE2__)
789 // Testing for the high bit can be done efficiently with just PMOVMSKB
790 bool loops = true;
791 if constexpr (UseAvx2) {
792 while (ptr + 32 <= end) {
793 __m256i data = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(ptr));
794 quint32 mask = _mm256_movemask_epi8(a: data);
795 if (mask) {
796 uint idx = qCountTrailingZeroBits(v: mask);
797 ptr += idx;
798 return false;
799 }
800 ptr += 32;
801 }
802 loops = false;
803 }
804
805 while (ptr + 16 <= end) {
806 __m128i data = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(ptr));
807 quint32 mask = _mm_movemask_epi8(a: data);
808 if (mask) {
809 uint idx = qCountTrailingZeroBits(v: mask);
810 ptr += idx;
811 return false;
812 }
813 ptr += 16;
814
815 if (!loops)
816 break;
817 }
818 if (ptr + 8 <= end) {
819 __m128i data = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(ptr));
820 quint8 mask = _mm_movemask_epi8(a: data);
821 if (mask) {
822 uint idx = qCountTrailingZeroBits(v: mask);
823 ptr += idx;
824 return false;
825 }
826 ptr += 8;
827 }
828#endif
829
830 while (ptr + 4 <= end) {
831 quint32 data = qFromUnaligned<quint32>(src: ptr);
832 if (data &= 0x80808080U) {
833 uint idx = QSysInfo::ByteOrder == QSysInfo::BigEndian
834 ? qCountLeadingZeroBits(v: data)
835 : qCountTrailingZeroBits(v: data);
836 ptr += idx / 8;
837 return false;
838 }
839 ptr += 4;
840 }
841
842 while (ptr != end) {
843 if (quint8(*ptr) & 0x80)
844 return false;
845 ++ptr;
846 }
847 return true;
848}
849
850bool QtPrivate::isAscii(QLatin1StringView s) noexcept
851{
852 const char *ptr = s.begin();
853 const char *end = s.end();
854
855 return qt_is_ascii(ptr, end);
856}
857
858static bool isAscii_helper(const char16_t *&ptr, const char16_t *end)
859{
860#ifdef __SSE2__
861 const char *ptr8 = reinterpret_cast<const char *>(ptr);
862 const char *end8 = reinterpret_cast<const char *>(end);
863 bool ok = simdTestMask(ptr&: ptr8, end: end8, maskval: 0xff80ff80);
864 ptr = reinterpret_cast<const char16_t *>(ptr8);
865 if (!ok)
866 return false;
867#endif
868
869 while (ptr != end) {
870 if (*ptr & 0xff80)
871 return false;
872 ++ptr;
873 }
874 return true;
875}
876
877bool QtPrivate::isAscii(QStringView s) noexcept
878{
879 const char16_t *ptr = s.utf16();
880 const char16_t *end = ptr + s.size();
881
882 return isAscii_helper(ptr, end);
883}
884
885bool QtPrivate::isLatin1(QStringView s) noexcept
886{
887 const char16_t *ptr = s.utf16();
888 const char16_t *end = ptr + s.size();
889
890#ifdef __SSE2__
891 const char *ptr8 = reinterpret_cast<const char *>(ptr);
892 const char *end8 = reinterpret_cast<const char *>(end);
893 if (!simdTestMask(ptr&: ptr8, end: end8, maskval: 0xff00ff00))
894 return false;
895 ptr = reinterpret_cast<const char16_t *>(ptr8);
896#endif
897
898 while (ptr != end) {
899 if (*ptr++ > 0xff)
900 return false;
901 }
902 return true;
903}
904
905bool QtPrivate::isValidUtf16(QStringView s) noexcept
906{
907 constexpr char32_t InvalidCodePoint = UINT_MAX;
908
909 QStringIterator i(s);
910 while (i.hasNext()) {
911 const char32_t c = i.next(invalidAs: InvalidCodePoint);
912 if (c == InvalidCodePoint)
913 return false;
914 }
915
916 return true;
917}
918
919// conversion between Latin 1 and UTF-16
920Q_CORE_EXPORT void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept
921{
922 /* SIMD:
923 * Unpacking with SSE has been shown to improve performance on recent CPUs
924 * The same method gives no improvement with NEON. On Aarch64, clang will do the vectorization
925 * itself in exactly the same way as one would do it with intrinsics.
926 */
927#if defined(__SSE2__)
928 // we're going to read str[offset..offset+15] (16 bytes)
929 const __m128i nullMask = _mm_setzero_si128();
930 auto processOneChunk = [=](qptrdiff offset) {
931 const __m128i chunk = _mm_loadu_si128(p: (const __m128i*)(str + offset)); // load
932 if constexpr (UseAvx2) {
933 // zero extend to an YMM register
934 const __m256i extended = _mm256_cvtepu8_epi16(V: chunk);
935
936 // store
937 _mm256_storeu_si256(p: (__m256i*)(dst + offset), a: extended);
938 } else {
939 // unpack the first 8 bytes, padding with zeros
940 const __m128i firstHalf = _mm_unpacklo_epi8(a: chunk, b: nullMask);
941 _mm_storeu_si128(p: (__m128i*)(dst + offset), b: firstHalf); // store
942
943 // unpack the last 8 bytes, padding with zeros
944 const __m128i secondHalf = _mm_unpackhi_epi8 (a: chunk, b: nullMask);
945 _mm_storeu_si128(p: (__m128i*)(dst + offset + 8), b: secondHalf); // store
946 }
947 };
948
949 const char *e = str + size;
950 if (size >= sizeof(__m128i)) {
951 qptrdiff offset = 0;
952 for ( ; str + offset + sizeof(__m128i) <= e; offset += sizeof(__m128i))
953 processOneChunk(offset);
954 if (str + offset < e)
955 processOneChunk(size - sizeof(__m128i));
956 return;
957 }
958
959# if !defined(__OPTIMIZE_SIZE__)
960 if (size >= 4) {
961 // two overlapped loads & stores, of either 64-bit or of 32-bit
962 if (size >= 8) {
963 const __m128i unpacked1 = mm_load8_zero_extend(ptr: str);
964 const __m128i unpacked2 = mm_load8_zero_extend(ptr: str + size - 8);
965 _mm_storeu_si128(p: reinterpret_cast<__m128i *>(dst), b: unpacked1);
966 _mm_storeu_si128(p: reinterpret_cast<__m128i *>(dst + size - 8), b: unpacked2);
967 } else {
968 const __m128i chunk1 = _mm_cvtsi32_si128(a: qFromUnaligned<quint32>(src: str));
969 const __m128i chunk2 = _mm_cvtsi32_si128(a: qFromUnaligned<quint32>(src: str + size - 4));
970 const __m128i unpacked1 = _mm_unpacklo_epi8(a: chunk1, b: nullMask);
971 const __m128i unpacked2 = _mm_unpacklo_epi8(a: chunk2, b: nullMask);
972 _mm_storel_epi64(p: reinterpret_cast<__m128i *>(dst), a: unpacked1);
973 _mm_storel_epi64(p: reinterpret_cast<__m128i *>(dst + size - 4), a: unpacked2);
974 }
975 return;
976 } else {
977 size = size % 4;
978 return UnrollTailLoop<3>::exec(count: qsizetype(size), code: [=](qsizetype i) { dst[i] = uchar(str[i]); });
979 }
980# endif
981#endif
982#if defined(__mips_dsp)
983 static_assert(sizeof(qsizetype) == sizeof(int),
984 "oops, the assembler implementation needs to be called in a loop");
985 if (size > 20)
986 qt_fromlatin1_mips_asm_unroll8(dst, str, size);
987 else
988 qt_fromlatin1_mips_asm_unroll4(dst, str, size);
989#else
990 while (size--)
991 *dst++ = (uchar)*str++;
992#endif
993}
994
995static QVarLengthArray<char16_t> qt_from_latin1_to_qvla(QLatin1StringView str)
996{
997 const qsizetype len = str.size();
998 QVarLengthArray<char16_t> arr(len);
999 qt_from_latin1(dst: arr.data(), str: str.data(), size: len);
1000 return arr;
1001}
1002
1003template <bool Checked>
1004static void qt_to_latin1_internal(uchar *dst, const char16_t *src, qsizetype length)
1005{
1006#if defined(__SSE2__)
1007 auto questionMark256 = []() {
1008 if constexpr (UseAvx2)
1009 return _mm256_broadcastw_epi16(X: _mm_cvtsi32_si128(a: '?'));
1010 else
1011 return 0;
1012 }();
1013 auto outOfRange256 = []() {
1014 if constexpr (UseAvx2)
1015 return _mm256_broadcastw_epi16(X: _mm_cvtsi32_si128(a: 0x100));
1016 else
1017 return 0;
1018 }();
1019 __m128i questionMark, outOfRange;
1020 if constexpr (UseAvx2) {
1021 questionMark = _mm256_castsi256_si128(questionMark256);
1022 outOfRange = _mm256_castsi256_si128(outOfRange256);
1023 } else {
1024 questionMark = _mm_set1_epi16(w: '?');
1025 outOfRange = _mm_set1_epi16(w: 0x100);
1026 }
1027
1028 auto mergeQuestionMarks = [=](__m128i chunk) {
1029 if (!Checked)
1030 return chunk;
1031
1032 // SSE has no compare instruction for unsigned comparison.
1033 if constexpr (UseSse4_1) {
1034 // We use an unsigned uc = qMin(uc, 0x100) and then compare for equality.
1035 chunk = _mm_min_epu16(V1: chunk, V2: outOfRange);
1036 const __m128i offLimitMask = _mm_cmpeq_epi16(a: chunk, b: outOfRange);
1037 chunk = _mm_blendv_epi8(V1: chunk, V2: questionMark, M: offLimitMask);
1038 return chunk;
1039 }
1040 // The variables must be shiffted + 0x8000 to be compared
1041 const __m128i signedBitOffset = _mm_set1_epi16(w: short(0x8000));
1042 const __m128i thresholdMask = _mm_set1_epi16(w: short(0xff + 0x8000));
1043
1044 const __m128i signedChunk = _mm_add_epi16(a: chunk, b: signedBitOffset);
1045 const __m128i offLimitMask = _mm_cmpgt_epi16(a: signedChunk, b: thresholdMask);
1046
1047 // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
1048 // the 16 bits that were correct contains zeros
1049 const __m128i offLimitQuestionMark = _mm_and_si128(a: offLimitMask, b: questionMark);
1050
1051 // correctBytes contains the bytes that were in limit
1052 // the 16 bits that were off limits contains zeros
1053 const __m128i correctBytes = _mm_andnot_si128(a: offLimitMask, b: chunk);
1054
1055 // merge offLimitQuestionMark and correctBytes to have the result
1056 chunk = _mm_or_si128(a: correctBytes, b: offLimitQuestionMark);
1057
1058 Q_UNUSED(outOfRange);
1059 return chunk;
1060 };
1061
1062 // we're going to read to src[offset..offset+15] (16 bytes)
1063 auto loadChunkAt = [=](qptrdiff offset) {
1064 __m128i chunk1, chunk2;
1065 if constexpr (UseAvx2) {
1066 __m256i chunk = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(src + offset));
1067 if (Checked) {
1068 // See mergeQuestionMarks lambda above for details
1069 chunk = _mm256_min_epu16(chunk, outOfRange256);
1070 const __m256i offLimitMask = _mm256_cmpeq_epi16(chunk, outOfRange256);
1071 chunk = _mm256_blendv_epi8(chunk, questionMark256, offLimitMask);
1072 }
1073
1074 chunk2 = _mm256_extracti128_si256(chunk, 1);
1075 chunk1 = _mm256_castsi256_si128(a: chunk);
1076 } else {
1077 chunk1 = _mm_loadu_si128(p: (const __m128i*)(src + offset)); // load
1078 chunk1 = mergeQuestionMarks(chunk1);
1079
1080 chunk2 = _mm_loadu_si128(p: (const __m128i*)(src + offset + 8)); // load
1081 chunk2 = mergeQuestionMarks(chunk2);
1082 }
1083
1084 // pack the two vector to 16 x 8bits elements
1085 return _mm_packus_epi16(a: chunk1, b: chunk2);
1086 };
1087
1088 if (size_t(length) >= sizeof(__m128i)) {
1089 // because of possible overlapping, we won't process the last chunk in the loop
1090 qptrdiff offset = 0;
1091 for ( ; offset + 2 * sizeof(__m128i) < size_t(length); offset += sizeof(__m128i))
1092 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + offset), loadChunkAt(offset));
1093
1094 // overlapped conversion of the last full chunk and the tail
1095 __m128i last1 = loadChunkAt(offset);
1096 __m128i last2 = loadChunkAt(length - sizeof(__m128i));
1097 _mm_storeu_si128(p: reinterpret_cast<__m128i *>(dst + offset), b: last1);
1098 _mm_storeu_si128(p: reinterpret_cast<__m128i *>(dst + length - sizeof(__m128i)), b: last2);
1099 return;
1100 }
1101
1102# if !defined(__OPTIMIZE_SIZE__)
1103 if (length >= 4) {
1104 // this code is fine even for in-place conversion because we load both
1105 // before any store
1106 if (length >= 8) {
1107 __m128i chunk1 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src));
1108 __m128i chunk2 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src + length - 8));
1109 chunk1 = mergeQuestionMarks(chunk1);
1110 chunk2 = mergeQuestionMarks(chunk2);
1111
1112 // pack, where the upper half is ignored
1113 const __m128i result1 = _mm_packus_epi16(a: chunk1, b: chunk1);
1114 const __m128i result2 = _mm_packus_epi16(a: chunk2, b: chunk2);
1115 _mm_storel_epi64(p: reinterpret_cast<__m128i *>(dst), a: result1);
1116 _mm_storel_epi64(p: reinterpret_cast<__m128i *>(dst + length - 8), a: result2);
1117 } else {
1118 __m128i chunk1 = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(src));
1119 __m128i chunk2 = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(src + length - 4));
1120 chunk1 = mergeQuestionMarks(chunk1);
1121 chunk2 = mergeQuestionMarks(chunk2);
1122
1123 // pack, we'll zero the upper three quarters
1124 const __m128i result1 = _mm_packus_epi16(a: chunk1, b: chunk1);
1125 const __m128i result2 = _mm_packus_epi16(a: chunk2, b: chunk2);
1126 qToUnaligned(src: _mm_cvtsi128_si32(a: result1), dest: dst);
1127 qToUnaligned(src: _mm_cvtsi128_si32(a: result2), dest: dst + length - 4);
1128 }
1129 return;
1130 }
1131
1132 length = length % 4;
1133 return UnrollTailLoop<3>::exec(length, [=](qsizetype i) {
1134 if (Checked)
1135 dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i];
1136 else
1137 dst[i] = src[i];
1138 });
1139# else
1140 length = length % 16;
1141# endif // optimize size
1142#elif defined(__ARM_NEON__)
1143 // Refer to the documentation of the SSE2 implementation.
1144 // This uses exactly the same method as for SSE except:
1145 // 1) neon has unsigned comparison
1146 // 2) packing is done to 64 bits (8 x 8bits component).
1147 if (length >= 16) {
1148 const qsizetype chunkCount = length >> 3; // divided by 8
1149 const uint16x8_t questionMark = vdupq_n_u16('?'); // set
1150 const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
1151 for (qsizetype i = 0; i < chunkCount; ++i) {
1152 uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
1153 src += 8;
1154
1155 if (Checked) {
1156 const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
1157 const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
1158 const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
1159 chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
1160 }
1161 const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
1162 vst1_u8(dst, result); // store
1163 dst += 8;
1164 }
1165 length = length % 8;
1166 }
1167#endif
1168#if defined(__mips_dsp)
1169 static_assert(sizeof(qsizetype) == sizeof(int),
1170 "oops, the assembler implementation needs to be called in a loop");
1171 qt_toLatin1_mips_dsp_asm(dst, src, length);
1172#else
1173 while (length--) {
1174 if (Checked)
1175 *dst++ = (*src>0xff) ? '?' : (uchar) *src;
1176 else
1177 *dst++ = *src;
1178 ++src;
1179 }
1180#endif
1181}
1182
1183void qt_to_latin1(uchar *dst, const char16_t *src, qsizetype length)
1184{
1185 qt_to_latin1_internal<true>(dst, src, length);
1186}
1187
1188void qt_to_latin1_unchecked(uchar *dst, const char16_t *src, qsizetype length)
1189{
1190 qt_to_latin1_internal<false>(dst, src, length);
1191}
1192
1193// Unicode case-insensitive comparison (argument order matches QStringView)
1194Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char16_t *b)
1195{
1196 if (a == b)
1197 return qt_lencmp(lhs: alen, rhs: blen);
1198
1199 char32_t alast = 0;
1200 char32_t blast = 0;
1201 qsizetype l = qMin(a: alen, b: blen);
1202 qsizetype i;
1203 for (i = 0; i < l; ++i) {
1204// qDebug() << Qt::hex << alast << blast;
1205// qDebug() << Qt::hex << "*a=" << *a << "alast=" << alast << "folded=" << foldCase (*a, alast);
1206// qDebug() << Qt::hex << "*b=" << *b << "blast=" << blast << "folded=" << foldCase (*b, blast);
1207 int diff = foldCase(ch: a[i], last&: alast) - foldCase(ch: b[i], last&: blast);
1208 if ((diff))
1209 return diff;
1210 }
1211 if (i == alen) {
1212 if (i == blen)
1213 return 0;
1214 return -1;
1215 }
1216 return 1;
1217}
1218
1219// Case-insensitive comparison between a QStringView and a QLatin1StringView
1220// (argument order matches those types)
1221Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char *b)
1222{
1223 qsizetype l = qMin(a: alen, b: blen);
1224 qsizetype i;
1225 for (i = 0; i < l; ++i) {
1226 int diff = foldCase(ch: a[i]) - foldCase(ch: char16_t{uchar(b[i])});
1227 if ((diff))
1228 return diff;
1229 }
1230 if (i == alen) {
1231 if (i == blen)
1232 return 0;
1233 return -1;
1234 }
1235 return 1;
1236}
1237
1238// Case-insensitive comparison between a Unicode string and a UTF-8 string
1239Q_NEVER_INLINE static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
1240{
1241 auto src1 = reinterpret_cast<const uchar *>(utf8);
1242 auto end1 = reinterpret_cast<const uchar *>(utf8end);
1243 QStringIterator src2(utf16, utf16end);
1244
1245 while (src1 < end1 && src2.hasNext()) {
1246 char32_t uc1 = 0;
1247 char32_t *output = &uc1;
1248 uchar b = *src1++;
1249 const qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst&: output, src&: src1, end: end1);
1250 if (res < 0) {
1251 // decoding error
1252 uc1 = QChar::ReplacementCharacter;
1253 } else {
1254 uc1 = QChar::toCaseFolded(ucs4: uc1);
1255 }
1256
1257 char32_t uc2 = QChar::toCaseFolded(ucs4: src2.next());
1258 int diff = uc1 - uc2; // can't underflow
1259 if (diff)
1260 return diff;
1261 }
1262
1263 // the shorter string sorts first
1264 return (end1 > src1) - int(src2.hasNext());
1265}
1266
1267#if defined(__mips_dsp)
1268// From qstring_mips_dsp_asm.S
1269extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a,
1270 const char16_t *b,
1271 unsigned len);
1272#endif
1273
1274// Unicode case-sensitive compare two same-sized strings
1275template <StringComparisonMode Mode>
1276static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
1277{
1278 // This function isn't memcmp() because that can return the wrong sorting
1279 // result in little-endian architectures: 0x00ff must sort before 0x0100,
1280 // but the bytes in memory are FF 00 and 00 01.
1281
1282#ifndef __OPTIMIZE_SIZE__
1283# if defined(__mips_dsp)
1284 static_assert(sizeof(uint) == sizeof(size_t));
1285 if (l >= 8) {
1286 return qt_ucstrncmp_mips_dsp_asm(a, b, l);
1287 }
1288# elif defined(__SSE2__)
1289 return ucstrncmp_sse2<Mode>(a, b, l);
1290# elif defined(__ARM_NEON__)
1291 if (l >= 8) {
1292 const char16_t *end = a + l;
1293 const uint16x8_t mask = qvsetq_n_u16( 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 );
1294 while (end - a > 7) {
1295 uint16x8_t da = vld1q_u16(reinterpret_cast<const uint16_t *>(a));
1296 uint16x8_t db = vld1q_u16(reinterpret_cast<const uint16_t *>(b));
1297
1298 uint8_t r = ~(uint8_t)vaddvq_u16(vandq_u16(vceqq_u16(da, db), mask));
1299 if (r) {
1300 // found a different QChar
1301 if (Mode == CompareStringsForEquality)
1302 return 1;
1303 uint idx = qCountTrailingZeroBits(r);
1304 return a[idx] - b[idx];
1305 }
1306 a += 8;
1307 b += 8;
1308 }
1309 l &= 7;
1310 }
1311 const auto lambda = [=](size_t i) -> int {
1312 return a[i] - b[i];
1313 };
1314 return UnrollTailLoop<7>::exec(l, 0, lambda, lambda);
1315# endif // MIPS DSP or __SSE2__ or __ARM_NEON__
1316#endif // __OPTIMIZE_SIZE__
1317
1318 if (Mode == CompareStringsForEquality || QSysInfo::ByteOrder == QSysInfo::BigEndian)
1319 return memcmp(s1: a, s2: b, n: l * sizeof(char16_t));
1320
1321 for (size_t i = 0; i < l; ++i) {
1322 if (int diff = a[i] - b[i])
1323 return diff;
1324 }
1325 return 0;
1326}
1327
1328template <StringComparisonMode Mode>
1329static int ucstrncmp(const char16_t *a, const char *b, size_t l)
1330{
1331 const uchar *c = reinterpret_cast<const uchar *>(b);
1332 const char16_t *uc = a;
1333 const char16_t *e = uc + l;
1334
1335#if defined(__SSE2__) && !defined(__OPTIMIZE_SIZE__)
1336 return ucstrncmp_sse2<Mode>(uc, c, l);
1337#endif
1338
1339 while (uc < e) {
1340 int diff = *uc - *c;
1341 if (diff)
1342 return diff;
1343 uc++, c++;
1344 }
1345
1346 return 0;
1347}
1348
1349// Unicode case-sensitive equality
1350template <typename Char2>
1351static bool ucstreq(const char16_t *a, size_t alen, const Char2 *b)
1352{
1353 if constexpr (std::is_same_v<decltype(a), decltype(b)>) {
1354 if (a == b)
1355 return true;
1356 }
1357 return ucstrncmp<CompareStringsForEquality>(a, b, alen) == 0;
1358}
1359
1360// Unicode case-sensitive comparison
1361template <typename Char2>
1362static int ucstrcmp(const char16_t *a, size_t alen, const Char2 *b, size_t blen)
1363{
1364 if constexpr (std::is_same_v<decltype(a), decltype(b)>) {
1365 if (a == b && alen == blen)
1366 return 0;
1367 }
1368 const size_t l = qMin(a: alen, b: blen);
1369 int cmp = ucstrncmp<CompareStringsForOrdering>(a, b, l);
1370 return cmp ? cmp : qt_lencmp(lhs: alen, rhs: blen);
1371}
1372
1373using CaseInsensitiveL1 = QtPrivate::QCaseInsensitiveLatin1Hash;
1374
1375static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
1376{
1377 // We're called with QLatin1StringView's .data() and .size():
1378 Q_ASSERT(lSize >= 0 && rSize >= 0);
1379 if (!lSize)
1380 return rSize ? -1 : 0;
1381 if (!rSize)
1382 return 1;
1383 const qsizetype size = std::min(a: lSize, b: rSize);
1384
1385 Q_ASSERT(lhsChar && rhsChar); // since both lSize and rSize are positive
1386 for (qsizetype i = 0; i < size; i++) {
1387 if (int res = CaseInsensitiveL1::difference(lhs: lhsChar[i], rhs: rhsChar[i]))
1388 return res;
1389 }
1390 return qt_lencmp(lhs: lSize, rhs: rSize);
1391}
1392
1393bool QtPrivate::equalStrings(QStringView lhs, QStringView rhs) noexcept
1394{
1395 Q_ASSERT(lhs.size() == rhs.size());
1396 return ucstreq(a: lhs.utf16(), alen: lhs.size(), b: rhs.utf16());
1397}
1398
1399bool QtPrivate::equalStrings(QStringView lhs, QLatin1StringView rhs) noexcept
1400{
1401 Q_ASSERT(lhs.size() == rhs.size());
1402 return ucstreq(a: lhs.utf16(), alen: lhs.size(), b: rhs.latin1());
1403}
1404
1405bool QtPrivate::equalStrings(QLatin1StringView lhs, QStringView rhs) noexcept
1406{
1407 return QtPrivate::equalStrings(lhs: rhs, rhs: lhs);
1408}
1409
1410bool QtPrivate::equalStrings(QLatin1StringView lhs, QLatin1StringView rhs) noexcept
1411{
1412 Q_ASSERT(lhs.size() == rhs.size());
1413 return (!lhs.size() || memcmp(s1: lhs.data(), s2: rhs.data(), n: lhs.size()) == 0);
1414}
1415
1416bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QStringView rhs) noexcept
1417{
1418 return QUtf8::compareUtf8(utf8: lhs, utf16: rhs) == 0;
1419}
1420
1421bool QtPrivate::equalStrings(QStringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1422{
1423 return QtPrivate::equalStrings(lhs: rhs, rhs: lhs);
1424}
1425
1426bool QtPrivate::equalStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1427{
1428 return QUtf8::compareUtf8(utf8: QByteArrayView(rhs), s: lhs) == 0;
1429}
1430
1431bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs) noexcept
1432{
1433 return QtPrivate::equalStrings(lhs: rhs, rhs: lhs);
1434}
1435
1436bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs) noexcept
1437{
1438#if QT_VERSION >= QT_VERSION_CHECK(7, 0, 0) || defined(QT_BOOTSTRAPPED) || defined(QT_STATIC)
1439 Q_ASSERT(lhs.size() == rhs.size());
1440#else
1441 // operator== didn't enforce size prior to Qt 6.2
1442 if (lhs.size() != rhs.size())
1443 return false;
1444#endif
1445 return (!lhs.size() || memcmp(s1: lhs.data(), s2: rhs.data(), n: lhs.size()) == 0);
1446}
1447
1448bool QAnyStringView::equal(QAnyStringView lhs, QAnyStringView rhs) noexcept
1449{
1450 if (lhs.size() != rhs.size() && lhs.isUtf8() == rhs.isUtf8())
1451 return false;
1452 return lhs.visit(v: [rhs](auto lhs) {
1453 return rhs.visit([lhs](auto rhs) {
1454 return QtPrivate::equalStrings(lhs, rhs);
1455 });
1456 });
1457}
1458
1459/*!
1460 \relates QStringView
1461 \internal
1462 \since 5.10
1463
1464 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1465
1466 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1467
1468 Case-sensitive comparison is based exclusively on the numeric Unicode values
1469 of the characters and is very fast, but is not what a human would expect.
1470 Consider sorting user-visible strings with QString::localeAwareCompare().
1471
1472 \sa {Comparing Strings}
1473*/
1474int QtPrivate::compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1475{
1476 if (cs == Qt::CaseSensitive)
1477 return ucstrcmp(a: lhs.utf16(), alen: lhs.size(), b: rhs.utf16(), blen: rhs.size());
1478 return ucstricmp(alen: lhs.size(), a: lhs.utf16(), blen: rhs.size(), b: rhs.utf16());
1479}
1480
1481/*!
1482 \relates QStringView
1483 \internal
1484 \since 5.10
1485 \overload
1486
1487 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1488
1489 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1490
1491 Case-sensitive comparison is based exclusively on the numeric Unicode values
1492 of the characters and is very fast, but is not what a human would expect.
1493 Consider sorting user-visible strings with QString::localeAwareCompare().
1494
1495 \sa {Comparing Strings}
1496*/
1497int QtPrivate::compareStrings(QStringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1498{
1499 if (cs == Qt::CaseSensitive)
1500 return ucstrcmp(a: lhs.utf16(), alen: lhs.size(), b: rhs.latin1(), blen: rhs.size());
1501 return ucstricmp(alen: lhs.size(), a: lhs.utf16(), blen: rhs.size(), b: rhs.latin1());
1502}
1503
1504/*!
1505 \relates QStringView
1506 \internal
1507 \since 6.0
1508 \overload
1509*/
1510int QtPrivate::compareStrings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1511{
1512 return -compareStrings(lhs: rhs, rhs: lhs, cs);
1513}
1514
1515/*!
1516 \relates QStringView
1517 \internal
1518 \since 5.10
1519 \overload
1520*/
1521int QtPrivate::compareStrings(QLatin1StringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1522{
1523 return -compareStrings(lhs: rhs, rhs: lhs, cs);
1524}
1525
1526/*!
1527 \relates QStringView
1528 \internal
1529 \since 5.10
1530 \overload
1531
1532 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1533
1534 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1535
1536 Case-sensitive comparison is based exclusively on the numeric Latin-1 values
1537 of the characters and is very fast, but is not what a human would expect.
1538 Consider sorting user-visible strings with QString::localeAwareCompare().
1539
1540 \sa {Comparing Strings}
1541*/
1542int QtPrivate::compareStrings(QLatin1StringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1543{
1544 if (lhs.isEmpty())
1545 return qt_lencmp(lhs: qsizetype(0), rhs: rhs.size());
1546 if (rhs.isEmpty())
1547 return qt_lencmp(lhs: lhs.size(), rhs: qsizetype(0));
1548 if (cs == Qt::CaseInsensitive)
1549 return latin1nicmp(lhsChar: lhs.data(), lSize: lhs.size(), rhsChar: rhs.data(), rSize: rhs.size());
1550 const auto l = std::min(a: lhs.size(), b: rhs.size());
1551 int r = memcmp(s1: lhs.data(), s2: rhs.data(), n: l);
1552 return r ? r : qt_lencmp(lhs: lhs.size(), rhs: rhs.size());
1553}
1554
1555/*!
1556 \relates QStringView
1557 \internal
1558 \since 6.0
1559 \overload
1560*/
1561int QtPrivate::compareStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1562{
1563 return -QUtf8::compareUtf8(utf8: QByteArrayView(rhs), s: lhs, cs);
1564}
1565
1566/*!
1567 \relates QStringView
1568 \internal
1569 \since 6.0
1570 \overload
1571*/
1572int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1573{
1574 if (cs == Qt::CaseSensitive)
1575 return QUtf8::compareUtf8(utf8: lhs, utf16: rhs);
1576 return ucstricmp8(utf8: lhs.begin(), utf8end: lhs.end(), utf16: rhs.begin(), utf16end: rhs.end());
1577}
1578
1579/*!
1580 \relates QStringView
1581 \internal
1582 \since 6.0
1583 \overload
1584*/
1585int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1586{
1587 return -compareStrings(lhs: rhs, rhs: lhs, cs);
1588}
1589
1590/*!
1591 \relates QStringView
1592 \internal
1593 \since 6.0
1594 \overload
1595*/
1596int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1597{
1598 return QUtf8::compareUtf8(lhs: QByteArrayView(lhs), rhs: QByteArrayView(rhs), cs);
1599}
1600
1601int QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs) noexcept
1602{
1603 return lhs.visit(v: [rhs, cs](auto lhs) {
1604 return rhs.visit([lhs, cs](auto rhs) {
1605 return QtPrivate::compareStrings(lhs, rhs, cs);
1606 });
1607 });
1608}
1609
1610// ### Qt 7: do not allow anything but ASCII digits
1611// in arg()'s replacements.
1612#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1613static bool supportUnicodeDigitValuesInArg()
1614{
1615 static const bool result = []() {
1616 static const char supportUnicodeDigitValuesEnvVar[]
1617 = "QT_USE_UNICODE_DIGIT_VALUES_IN_STRING_ARG";
1618
1619 if (qEnvironmentVariableIsSet(varName: supportUnicodeDigitValuesEnvVar))
1620 return qEnvironmentVariableIntValue(varName: supportUnicodeDigitValuesEnvVar) != 0;
1621
1622#if QT_VERSION < QT_VERSION_CHECK(6, 6, 0) // keep it in sync with the test
1623 return true;
1624#else
1625 return false;
1626#endif
1627 }();
1628
1629 return result;
1630}
1631#endif
1632
1633static int qArgDigitValue(QChar ch) noexcept
1634{
1635#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1636 if (supportUnicodeDigitValuesInArg())
1637 return ch.digitValue();
1638#endif
1639 if (ch >= u'0' && ch <= u'9')
1640 return int(ch.unicode() - u'0');
1641 return -1;
1642}
1643
1644#if QT_CONFIG(regularexpression)
1645Q_DECL_COLD_FUNCTION
1646void qtWarnAboutInvalidRegularExpression(const QString &pattern, const char *where);
1647#endif
1648
1649/*!
1650 \macro QT_RESTRICTED_CAST_FROM_ASCII
1651 \relates QString
1652
1653 Disables most automatic conversions from source literals and 8-bit data
1654 to unicode QStrings, but allows the use of
1655 the \c{QChar(char)} and \c{QString(const char (&ch)[N]} constructors,
1656 and the \c{QString::operator=(const char (&ch)[N])} assignment operator.
1657 This gives most of the type-safety benefits of \l QT_NO_CAST_FROM_ASCII
1658 but does not require user code to wrap character and string literals
1659 with QLatin1Char, QLatin1StringView or similar.
1660
1661 Using this macro together with source strings outside the 7-bit range,
1662 non-literals, or literals with embedded NUL characters is undefined.
1663
1664 \sa QT_NO_CAST_FROM_ASCII, QT_NO_CAST_TO_ASCII
1665*/
1666
1667/*!
1668 \macro QT_NO_CAST_FROM_ASCII
1669 \relates QString
1670 \relates QChar
1671
1672 Disables automatic conversions from 8-bit strings (\c{char *}) to Unicode
1673 QStrings, as well as from 8-bit \c{char} types (\c{char} and
1674 \c{unsigned char}) to QChar.
1675
1676 \sa QT_NO_CAST_TO_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1677 QT_NO_CAST_FROM_BYTEARRAY
1678*/
1679
1680/*!
1681 \macro QT_NO_CAST_TO_ASCII
1682 \relates QString
1683
1684 Disables automatic conversion from QString to 8-bit strings (\c{char *}).
1685
1686 \sa QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1687 QT_NO_CAST_FROM_BYTEARRAY
1688*/
1689
1690/*!
1691 \macro QT_ASCII_CAST_WARNINGS
1692 \internal
1693 \relates QString
1694
1695 This macro can be defined to force a warning whenever a function is
1696 called that automatically converts between unicode and 8-bit encodings.
1697
1698 Note: This only works for compilers that support warnings for
1699 deprecated API.
1700
1701 \sa QT_NO_CAST_TO_ASCII, QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII
1702*/
1703
1704/*!
1705 \class QString
1706 \inmodule QtCore
1707 \reentrant
1708
1709 \brief The QString class provides a Unicode character string.
1710
1711 \ingroup tools
1712 \ingroup shared
1713 \ingroup string-processing
1714
1715 \compares strong
1716 \compareswith strong QChar QLatin1StringView {const char16_t *} \
1717 QStringView QUtf8StringView
1718 \endcompareswith
1719 \compareswith strong QByteArray QByteArrayView {const char *}
1720 When comparing with byte arrays, their content is interpreted as UTF-8.
1721 \endcompareswith
1722
1723 QString stores a string of 16-bit \l{QChar}s, where each QChar
1724 corresponds to one UTF-16 code unit. (Unicode characters
1725 with code values above 65535 are stored using surrogate pairs,
1726 that is, two consecutive \l{QChar}s.)
1727
1728 \l{Unicode} is an international standard that supports most of the
1729 writing systems in use today. It is a superset of US-ASCII (ANSI
1730 X3.4-1986) and Latin-1 (ISO 8859-1), and all the US-ASCII/Latin-1
1731 characters are available at the same code positions.
1732
1733 Behind the scenes, QString uses \l{implicit sharing}
1734 (copy-on-write) to reduce memory usage and to avoid the needless
1735 copying of data. This also helps reduce the inherent overhead of
1736 storing 16-bit characters instead of 8-bit characters.
1737
1738 In addition to QString, Qt also provides the QByteArray class to
1739 store raw bytes and traditional 8-bit '\\0'-terminated strings.
1740 For most purposes, QString is the class you want to use. It is
1741 used throughout the Qt API, and the Unicode support ensures that
1742 your applications are easy to translate if you want to expand
1743 your application's market at some point. Two prominent cases
1744 where QByteArray is appropriate are when you need to store raw
1745 binary data, and when memory conservation is critical (like in
1746 embedded systems).
1747
1748 \section1 Initializing a string
1749
1750 One way to initialize a QString is to pass a \c{const char
1751 *} to its constructor. For example, the following code creates a
1752 QString of size 5 containing the data "Hello":
1753
1754 \snippet qstring/main.cpp 0
1755
1756 QString converts the \c{const char *} data into Unicode using the
1757 fromUtf8() function.
1758
1759 In all of the QString functions that take \c{const char *}
1760 parameters, the \c{const char *} is interpreted as a classic
1761 C-style \c{'\\0'}-terminated string. Except where the function's
1762 name overtly indicates some other encoding, such \c{const char *}
1763 parameters are assumed to be encoded in UTF-8.
1764
1765 You can also provide string data as an array of \l{QChar}s:
1766
1767 \snippet qstring/main.cpp 1
1768
1769 QString makes a deep copy of the QChar data, so you can modify it
1770 later without experiencing side effects. You can avoid taking a
1771 deep copy of the character data by using QStringView or
1772 QString::fromRawData() instead.
1773
1774 Another approach is to set the size of the string using resize()
1775 and to initialize the data character per character. QString uses
1776 0-based indexes, just like C++ arrays. To access the character at
1777 a particular index position, you can use \l operator[](). On
1778 non-\c{const} strings, \l operator[]() returns a reference to a
1779 character that can be used on the left side of an assignment. For
1780 example:
1781
1782 \snippet qstring/main.cpp 2
1783
1784 For read-only access, an alternative syntax is to use the at()
1785 function:
1786
1787 \snippet qstring/main.cpp 3
1788
1789 The at() function can be faster than \l operator[]() because it
1790 never causes a \l{deep copy} to occur. Alternatively, use the
1791 first(), last(), or sliced() functions to extract several characters
1792 at a time.
1793
1794 A QString can embed '\\0' characters (QChar::Null). The size()
1795 function always returns the size of the whole string, including
1796 embedded '\\0' characters.
1797
1798 After a call to the resize() function, newly allocated characters
1799 have undefined values. To set all the characters in the string to
1800 a particular value, use the fill() function.
1801
1802 QString provides dozens of overloads designed to simplify string
1803 usage. For example, if you want to compare a QString with a string
1804 literal, you can write code like this and it will work as expected:
1805
1806 \snippet qstring/main.cpp 4
1807
1808 You can also pass string literals to functions that take QStrings
1809 as arguments, invoking the QString(const char *)
1810 constructor. Similarly, you can pass a QString to a function that
1811 takes a \c{const char *} argument using the \l qPrintable() macro,
1812 which returns the given QString as a \c{const char *}. This is
1813 equivalent to calling <QString>.toLocal8Bit().constData().
1814
1815 \section1 Manipulating string data
1816
1817 QString provides the following basic functions for modifying the
1818 character data: append(), prepend(), insert(), replace(), and
1819 remove(). For example:
1820
1821 \snippet qstring/main.cpp 5
1822
1823 In the above example, the replace() function's first two arguments are the
1824 position from which to start replacing and the number of characters that
1825 should be replaced.
1826
1827 When data-modifying functions increase the size of the string,
1828 QString may reallocate the memory in which it holds its data. When
1829 this happens, QString expands by more than it immediately needs so as
1830 to have space for further expansion without reallocation until the size
1831 of the string has significantly increased.
1832
1833 The insert(), remove(), and, when replacing a sub-string with one of
1834 different size, replace() functions can be slow (\l{linear time}) for
1835 large strings because they require moving many characters in the string
1836 by at least one position in memory.
1837
1838 If you are building a QString gradually and know in advance
1839 approximately how many characters the QString will contain, you
1840 can call reserve(), asking QString to preallocate a certain amount
1841 of memory. You can also call capacity() to find out how much
1842 memory the QString actually has allocated.
1843
1844 QString provides \l{STL-style iterators} (QString::const_iterator and
1845 QString::iterator). In practice, iterators are handy when working with
1846 generic algorithms provided by the C++ standard library.
1847
1848 \note Iterators over a QString, and references to individual characters
1849 within one, cannot be relied on to remain valid when any non-\c{const}
1850 method of the QString is called. Accessing such an iterator or reference
1851 after the call to a non-\c{const} method leads to undefined behavior. When
1852 stability for iterator-like functionality is required, you should use
1853 indexes instead of iterators, as they are not tied to QString's internal
1854 state and thus do not get invalidated.
1855
1856 \note Due to \l{implicit sharing}, the first non-\c{const} operator or
1857 function used on a given QString may cause it to internally perform a deep
1858 copy of its data. This invalidates all iterators over the string and
1859 references to individual characters within it. Do not call non-const
1860 functions while keeping iterators. Accessing an iterator or reference
1861 after it has been invalidated leads to undefined behavior. See the
1862 \l{Implicit sharing iterator problem} section for more information.
1863
1864 A frequent requirement is to remove or simplify the spacing between
1865 visible characters in a string. The characters that make up that spacing
1866 are those for which \l {QChar::}{isSpace()} returns \c true, such as
1867 the simple space \c{' '}, the horizontal tab \c{'\\t'} and the newline \c{'\\n'}.
1868 To obtain a copy of a string leaving out any spacing from its start and end,
1869 use \l trimmed(). To also replace each sequence of spacing characters within
1870 the string with a simple space, \c{' '}, use \l simplified().
1871
1872 If you want to find all occurrences of a particular character or
1873 substring in a QString, use the indexOf() or lastIndexOf()
1874 functions.The former searches forward, the latter searches backward.
1875 Either can be told an index position from which to start their search.
1876 Each returns the index position of the character or substring if they
1877 find it; otherwise, they return -1. For example, here is a typical loop
1878 that finds all occurrences of a particular substring:
1879
1880 \snippet qstring/main.cpp 6
1881
1882 QString provides many functions for converting numbers into
1883 strings and strings into numbers. See the arg() functions, the
1884 setNum() functions, the number() static functions, and the
1885 toInt(), toDouble(), and similar functions.
1886
1887 To get an uppercase or lowercase version of a string, use toUpper() or
1888 toLower().
1889
1890 Lists of strings are handled by the QStringList class. You can
1891 split a string into a list of strings using the split() function,
1892 and join a list of strings into a single string with an optional
1893 separator using QStringList::join(). You can obtain a filtered list
1894 from a string list by selecting the entries in it that contain a
1895 particular substring or match a particular QRegularExpression.
1896 See QStringList::filter() for details.
1897
1898 \section1 Querying string data
1899
1900 To see if a QString starts or ends with a particular substring, use
1901 startsWith() or endsWith(). To check whether a QString contains a
1902 specific character or substring, use the contains() function. To
1903 find out how many times a particular character or substring occurs
1904 in a string, use count().
1905
1906 To obtain a pointer to the actual character data, call data() or
1907 constData(). These functions return a pointer to the beginning of
1908 the QChar data. The pointer is guaranteed to remain valid until a
1909 non-\c{const} function is called on the QString.
1910
1911 \section2 Comparing strings
1912
1913 QStrings can be compared using overloaded operators such as \l
1914 operator<(), \l operator<=(), \l operator==(), \l operator>=(),
1915 and so on. The comparison is based exclusively on the lexicographical
1916 order of the two strings, seen as sequences of UTF-16 code units.
1917 It is very fast but is not what a human would expect; the
1918 QString::localeAwareCompare() function is usually a better choice for
1919 sorting user-interface strings, when such a comparison is available.
1920
1921 When Qt is linked with the ICU library (which it usually is), its
1922 locale-aware sorting is used. Otherwise, platform-specific solutions
1923 are used:
1924 \list
1925 \li On Windows, localeAwareCompare() uses the current user locale,
1926 as set in the \uicontrol{regional} and \uicontrol{language}
1927 options portion of \uicontrol{Control Panel}.
1928 \li On \macos and iOS, \l localeAwareCompare() compares according
1929 to the \uicontrol{Order for sorted lists} setting in the
1930 \uicontrol{International preferences} panel.
1931 \li On other Unix-like systems, the comparison falls back to the
1932 system library's \c strcoll().
1933 \endlist
1934
1935 \section1 Converting between encoded string data and QString
1936
1937 QString provides the following functions that return a
1938 \c{const char *} version of the string as QByteArray: toUtf8(),
1939 toLatin1(), and toLocal8Bit().
1940
1941 \list
1942 \li toLatin1() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
1943 \li toUtf8() returns a UTF-8 encoded 8-bit string. UTF-8 is a
1944 superset of US-ASCII (ANSI X3.4-1986) that supports the entire
1945 Unicode character set through multibyte sequences.
1946 \li toLocal8Bit() returns an 8-bit string using the system's local
1947 encoding. This is the same as toUtf8() on Unix systems.
1948 \endlist
1949
1950 To convert from one of these encodings, QString provides
1951 fromLatin1(), fromUtf8(), and fromLocal8Bit(). Other
1952 encodings are supported through the QStringEncoder and QStringDecoder
1953 classes.
1954
1955 As mentioned above, QString provides a lot of functions and
1956 operators that make it easy to interoperate with \c{const char *}
1957 strings. But this functionality is a double-edged sword: It makes
1958 QString more convenient to use if all strings are US-ASCII or
1959 Latin-1, but there is always the risk that an implicit conversion
1960 from or to \c{const char *} is done using the wrong 8-bit
1961 encoding. To minimize these risks, you can turn off these implicit
1962 conversions by defining some of the following preprocessor symbols:
1963
1964 \list
1965 \li \l QT_NO_CAST_FROM_ASCII disables automatic conversions from
1966 C string literals and pointers to Unicode.
1967 \li \l QT_RESTRICTED_CAST_FROM_ASCII allows automatic conversions
1968 from C characters and character arrays but disables automatic
1969 conversions from character pointers to Unicode.
1970 \li \l QT_NO_CAST_TO_ASCII disables automatic conversion from QString
1971 to C strings.
1972 \endlist
1973
1974 You then need to explicitly call fromUtf8(), fromLatin1(),
1975 or fromLocal8Bit() to construct a QString from an
1976 8-bit string, or use the lightweight QLatin1StringView class. For
1977 example:
1978
1979 \snippet code/src_corelib_text_qstring.cpp 1
1980
1981 Similarly, you must call toLatin1(), toUtf8(), or
1982 toLocal8Bit() explicitly to convert the QString to an 8-bit
1983 string.
1984
1985 \table 100 %
1986 \header
1987 \li Note for C Programmers
1988
1989 \row
1990 \li
1991 Due to C++'s type system and the fact that QString is
1992 \l{implicitly shared}, QStrings may be treated like \c{int}s or
1993 other basic types. For example:
1994
1995 \snippet qstring/main.cpp 7
1996
1997 The \c result variable is a normal variable allocated on the
1998 stack. When \c return is called, and because we're returning by
1999 value, the copy constructor is called and a copy of the string is
2000 returned. No actual copying takes place thanks to the implicit
2001 sharing.
2002
2003 \endtable
2004
2005 \section1 Distinction between null and empty strings
2006
2007 For historical reasons, QString distinguishes between null
2008 and empty strings. A \e null string is a string that is
2009 initialized using QString's default constructor or by passing
2010 \nullptr to the constructor. An \e empty string is any
2011 string with size 0. A null string is always empty, but an empty
2012 string isn't necessarily null:
2013
2014 \snippet qstring/main.cpp 8
2015
2016 All functions except isNull() treat null strings the same as empty
2017 strings. For example, toUtf8().constData() returns a valid pointer
2018 (not \nullptr) to a '\\0' character for a null string. We
2019 recommend that you always use the isEmpty() function and avoid isNull().
2020
2021 \section1 Number formats
2022
2023 When a QString::arg() \c{'%'} format specifier includes the \c{'L'} locale
2024 qualifier, and the base is ten (its default), the default locale is
2025 used. This can be set using \l{QLocale::setDefault()}. For more refined
2026 control of localized string representations of numbers, see
2027 QLocale::toString(). All other number formatting done by QString follows the
2028 C locale's representation of numbers.
2029
2030 When QString::arg() applies left-padding to numbers, the fill character
2031 \c{'0'} is treated specially. If the number is negative, its minus sign
2032 appears before the zero-padding. If the field is localized, the
2033 locale-appropriate zero character is used in place of \c{'0'}. For
2034 floating-point numbers, this special treatment only applies if the number is
2035 finite.
2036
2037 \section2 Floating-point formats
2038
2039 In member functions (for example, arg() and number()) that format floating-point
2040 numbers (\c float or \c double) as strings, the representation used can be
2041 controlled by a choice of \e format and \e precision, whose meanings are as
2042 for \l {QLocale::toString(double, char, int)}.
2043
2044 If the selected \e format includes an exponent, localized forms follow the
2045 locale's convention on digits in the exponent. For non-localized formatting,
2046 the exponent shows its sign and includes at least two digits, left-padding
2047 with zero if needed.
2048
2049 \section1 More efficient string construction
2050
2051 Many strings are known at compile time. The QString constructor from
2052 C++ string literals will copy the contents of the string,
2053 treating the contents as UTF-8. This requires memory allocation and
2054 re-encoding string data, operations that will happen at runtime.
2055 If the string data is known at compile time, you can use the QStringLiteral
2056 macro or similarly \c{operator""_s} to create QString's payload at compile
2057 time instead.
2058
2059 Using the QString \c{'+'} operator, it is easy to construct a
2060 complex string from multiple substrings. You will often write code
2061 like this:
2062
2063 \snippet qstring/stringbuilder.cpp 0
2064
2065 There is nothing wrong with either of these string constructions,
2066 but there are a few hidden inefficiencies:
2067
2068 First, repeated use of the \c{'+'} operator may lead to
2069 multiple memory allocations. When concatenating \e{n} substrings,
2070 where \e{n > 2}, there can be as many as \e{n - 1} calls to the
2071 memory allocator.
2072
2073 These allocations can be optimized by an internal class
2074 \c{QStringBuilder}. This class is marked
2075 internal and does not appear in the documentation, because you
2076 aren't meant to instantiate it in your code. Its use will be
2077 automatic, as described below. The class is found in
2078 \c {src/corelib/tools/qstringbuilder.cpp} if you want to have a
2079 look at it.
2080
2081 \c{QStringBuilder} uses expression templates and reimplements the
2082 \c{'%'} operator so that when you use \c{'%'} for string
2083 concatenation instead of \c{'+'}, multiple substring
2084 concatenations will be postponed until the final result is about
2085 to be assigned to a QString. At this point, the amount of memory
2086 required for the final result is known. The memory allocator is
2087 then called \e{once} to get the required space, and the substrings
2088 are copied into it one by one.
2089
2090 Additional efficiency is gained by inlining and reducing reference
2091 counting (the QString created from a \c{QStringBuilder}
2092 has a ref count of 1, whereas QString::append() needs an extra
2093 test).
2094
2095 There are two ways you can access this improved method of string
2096 construction. The straightforward way is to include
2097 \c{QStringBuilder} wherever you want to use it and use the
2098 \c{'%'} operator instead of \c{'+'} when concatenating strings:
2099
2100 \snippet qstring/stringbuilder.cpp 5
2101
2102 A more global approach, which is more convenient but not entirely
2103 source-compatible, is to define \c QT_USE_QSTRINGBUILDER (by adding
2104 it to the compiler flags) at build time. This will make concatenating
2105 strings with \c{'+'} work the same way as \c{QStringBuilder's} \c{'%'}.
2106
2107 \note Using automatic type deduction (for example, by using the \c
2108 auto keyword) with the result of string concatenation when QStringBuilder
2109 is enabled will show that the concatenation is indeed an object of a
2110 QStringBuilder specialization:
2111
2112 \snippet qstring/stringbuilder.cpp 6
2113
2114 This does not cause any harm, as QStringBuilder will implicitly convert to
2115 QString when required. If this is undesirable, then one should specify
2116 the necessary types instead of having the compiler deduce them:
2117
2118 \snippet qstring/stringbuilder.cpp 7
2119
2120 \section1 Maximum size and out-of-memory conditions
2121
2122 The maximum size of QString depends on the architecture. Most 64-bit
2123 systems can allocate more than 2 GB of memory, with a typical limit
2124 of 2^63 bytes. The actual value also depends on the overhead required for
2125 managing the data block. As a result, you can expect a maximum size
2126 of 2 GB minus overhead on 32-bit platforms and 2^63 bytes minus overhead
2127 on 64-bit platforms. The number of elements that can be stored in a
2128 QString is this maximum size divided by the size of QChar.
2129
2130 When memory allocation fails, QString throws a \c std::bad_alloc
2131 exception if the application was compiled with exception support.
2132 Out-of-memory conditions in Qt containers are the only cases where Qt
2133 will throw exceptions. If exceptions are disabled, then running out of
2134 memory is undefined behavior.
2135
2136 \note Target operating systems may impose limits on how much memory an
2137 application can allocate, in total, or on the size of individual allocations.
2138 This may further restrict the size of string a QString can hold.
2139 Mitigating or controlling the behavior these limits cause is beyond the
2140 scope of the Qt API.
2141
2142 \sa fromRawData(), QChar, QStringView, QLatin1StringView, QByteArray
2143*/
2144
2145/*! \typedef QString::ConstIterator
2146
2147 Qt-style synonym for QString::const_iterator.
2148*/
2149
2150/*! \typedef QString::Iterator
2151
2152 Qt-style synonym for QString::iterator.
2153*/
2154
2155/*! \typedef QString::const_iterator
2156
2157 \sa QString::iterator
2158*/
2159
2160/*! \typedef QString::iterator
2161
2162 \sa QString::const_iterator
2163*/
2164
2165/*! \typedef QString::const_reverse_iterator
2166 \since 5.6
2167
2168 \sa QString::reverse_iterator, QString::const_iterator
2169*/
2170
2171/*! \typedef QString::reverse_iterator
2172 \since 5.6
2173
2174 \sa QString::const_reverse_iterator, QString::iterator
2175*/
2176
2177/*!
2178 \typedef QString::size_type
2179*/
2180
2181/*!
2182 \typedef QString::difference_type
2183*/
2184
2185/*!
2186 \typedef QString::const_reference
2187*/
2188/*!
2189 \typedef QString::reference
2190*/
2191
2192/*!
2193 \typedef QString::const_pointer
2194
2195 The QString::const_pointer typedef provides an STL-style
2196 const pointer to a QString element (QChar).
2197*/
2198/*!
2199 \typedef QString::pointer
2200
2201 The QString::pointer typedef provides an STL-style
2202 pointer to a QString element (QChar).
2203*/
2204
2205/*!
2206 \typedef QString::value_type
2207*/
2208
2209/*! \fn QString::iterator QString::begin()
2210
2211 Returns an \l{STL-style iterators}{STL-style iterator} pointing to the
2212 first character in the string.
2213
2214//! [iterator-invalidation-func-desc]
2215 \warning The returned iterator is invalidated on detachment or when the
2216 QString is modified.
2217//! [iterator-invalidation-func-desc]
2218
2219 \sa constBegin(), end()
2220*/
2221
2222/*! \fn QString::const_iterator QString::begin() const
2223
2224 \overload begin()
2225*/
2226
2227/*! \fn QString::const_iterator QString::cbegin() const
2228 \since 5.0
2229
2230 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2231 first character in the string.
2232
2233 \include qstring.cpp iterator-invalidation-func-desc
2234
2235 \sa begin(), cend()
2236*/
2237
2238/*! \fn QString::const_iterator QString::constBegin() const
2239
2240 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2241 first character in the string.
2242
2243 \include qstring.cpp iterator-invalidation-func-desc
2244
2245 \sa begin(), constEnd()
2246*/
2247
2248/*! \fn QString::iterator QString::end()
2249
2250 Returns an \l{STL-style iterators}{STL-style iterator} pointing just after
2251 the last character in the string.
2252
2253 \include qstring.cpp iterator-invalidation-func-desc
2254
2255 \sa begin(), constEnd()
2256*/
2257
2258/*! \fn QString::const_iterator QString::end() const
2259
2260 \overload end()
2261*/
2262
2263/*! \fn QString::const_iterator QString::cend() const
2264 \since 5.0
2265
2266 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2267 after the last character in the string.
2268
2269 \include qstring.cpp iterator-invalidation-func-desc
2270
2271 \sa cbegin(), end()
2272*/
2273
2274/*! \fn QString::const_iterator QString::constEnd() const
2275
2276 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2277 after the last character in the string.
2278
2279 \include qstring.cpp iterator-invalidation-func-desc
2280
2281 \sa constBegin(), end()
2282*/
2283
2284/*! \fn QString::reverse_iterator QString::rbegin()
2285 \since 5.6
2286
2287 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing to
2288 the first character in the string, in reverse order.
2289
2290 \include qstring.cpp iterator-invalidation-func-desc
2291
2292 \sa begin(), crbegin(), rend()
2293*/
2294
2295/*! \fn QString::const_reverse_iterator QString::rbegin() const
2296 \since 5.6
2297 \overload
2298*/
2299
2300/*! \fn QString::const_reverse_iterator QString::crbegin() const
2301 \since 5.6
2302
2303 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2304 pointing to the first character in the string, in reverse order.
2305
2306 \include qstring.cpp iterator-invalidation-func-desc
2307
2308 \sa begin(), rbegin(), rend()
2309*/
2310
2311/*! \fn QString::reverse_iterator QString::rend()
2312 \since 5.6
2313
2314 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing just
2315 after the last character in the string, in reverse order.
2316
2317 \include qstring.cpp iterator-invalidation-func-desc
2318
2319 \sa end(), crend(), rbegin()
2320*/
2321
2322/*! \fn QString::const_reverse_iterator QString::rend() const
2323 \since 5.6
2324 \overload
2325*/
2326
2327/*! \fn QString::const_reverse_iterator QString::crend() const
2328 \since 5.6
2329
2330 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2331 pointing just after the last character in the string, in reverse order.
2332
2333 \include qstring.cpp iterator-invalidation-func-desc
2334
2335 \sa end(), rend(), rbegin()
2336*/
2337
2338/*!
2339 \fn QString::QString()
2340
2341 Constructs a null string. Null strings are also considered empty.
2342
2343 \sa isEmpty(), isNull(), {Distinction Between Null and Empty Strings}
2344*/
2345
2346/*!
2347 \fn QString::QString(QString &&other)
2348
2349 Move-constructs a QString instance, making it point at the same
2350 object that \a other was pointing to.
2351
2352 \since 5.2
2353*/
2354
2355/*! \fn QString::QString(const char *str)
2356
2357 Constructs a string initialized with the 8-bit string \a str. The
2358 given const char pointer is converted to Unicode using the
2359 fromUtf8() function.
2360
2361 You can disable this constructor by defining
2362 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2363 can be useful if you want to ensure that all user-visible strings
2364 go through QObject::tr(), for example.
2365
2366 \note Defining \l QT_RESTRICTED_CAST_FROM_ASCII also disables
2367 this constructor, but enables a \c{QString(const char (&ch)[N])}
2368 constructor instead. Using non-literal input, or input with
2369 embedded NUL characters, or non-7-bit characters is undefined
2370 in this case.
2371
2372 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2373*/
2374
2375/*! \fn QString::QString(const char8_t *str)
2376
2377 Constructs a string initialized with the UTF-8 string \a str. The
2378 given const char8_t pointer is converted to Unicode using the
2379 fromUtf8() function.
2380
2381 \since 6.1
2382 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2383*/
2384
2385/*
2386//! [from-std-string]
2387Returns a copy of the \a str string. The given string is assumed to be
2388encoded in \1, and is converted to QString using the \2 function.
2389//! [from-std-string]
2390*/
2391
2392/*! \fn QString QString::fromStdString(const std::string &str)
2393
2394 \include qstring.cpp {from-std-string} {UTF-8} {fromUtf8()}
2395
2396 \sa fromLatin1(), fromLocal8Bit(), fromUtf8(), QByteArray::fromStdString()
2397*/
2398
2399/*! \fn QString QString::fromStdWString(const std::wstring &str)
2400
2401 Returns a copy of the \a str string. The given string is assumed
2402 to be encoded in utf16 if the size of wchar_t is 2 bytes (e.g. on
2403 windows) and ucs4 if the size of wchar_t is 4 bytes (most Unix
2404 systems).
2405
2406 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2407 fromStdU16String(), fromStdU32String()
2408*/
2409
2410/*! \fn QString QString::fromWCharArray(const wchar_t *string, qsizetype size)
2411 \since 4.2
2412
2413 Reads the first \a size code units of the \c wchar_t array to whose start
2414 \a string points, converting them to Unicode and returning the result as
2415 a QString. The encoding used by \c wchar_t is assumed to be UTF-32 if the
2416 type's size is four bytes or UTF-16 if its size is two bytes.
2417
2418 If \a size is -1 (default), the \a string must be '\\0'-terminated.
2419
2420 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2421 fromStdWString()
2422*/
2423
2424/*! \fn std::wstring QString::toStdWString() const
2425
2426 Returns a std::wstring object with the data contained in this
2427 QString. The std::wstring is encoded in UTF-16 on platforms where
2428 wchar_t is 2 bytes wide (for example, Windows) and in UTF-32 on platforms
2429 where wchar_t is 4 bytes wide (most Unix systems).
2430
2431 This method is mostly useful to pass a QString to a function
2432 that accepts a std::wstring object.
2433
2434 \sa utf16(), toLatin1(), toUtf8(), toLocal8Bit(), toStdU16String(),
2435 toStdU32String()
2436*/
2437
2438qsizetype QString::toUcs4_helper(const char16_t *uc, qsizetype length, char32_t *out)
2439{
2440 qsizetype count = 0;
2441
2442 QStringIterator i(QStringView(uc, length));
2443 while (i.hasNext())
2444 out[count++] = i.next();
2445
2446 return count;
2447}
2448
2449/*! \fn qsizetype QString::toWCharArray(wchar_t *array) const
2450 \since 4.2
2451
2452 Fills the \a array with the data contained in this QString object.
2453 The array is encoded in UTF-16 on platforms where
2454 wchar_t is 2 bytes wide (e.g. windows) and in UTF-32 on platforms
2455 where wchar_t is 4 bytes wide (most Unix systems).
2456
2457 \a array has to be allocated by the caller and contain enough space to
2458 hold the complete string (allocating the array with the same length as the
2459 string is always sufficient).
2460
2461 This function returns the actual length of the string in \a array.
2462
2463 \note This function does not append a null character to the array.
2464
2465 \sa utf16(), toUcs4(), toLatin1(), toUtf8(), toLocal8Bit(), toStdWString(),
2466 QStringView::toWCharArray()
2467*/
2468
2469/*! \fn QString::QString(const QString &other)
2470
2471 Constructs a copy of \a other.
2472
2473 This operation takes \l{constant time}, because QString is
2474 \l{implicitly shared}. This makes returning a QString from a
2475 function very fast. If a shared instance is modified, it will be
2476 copied (copy-on-write), and that takes \l{linear time}.
2477
2478 \sa operator=()
2479*/
2480
2481/*!
2482 Constructs a string initialized with the first \a size characters
2483 of the QChar array \a unicode.
2484
2485 If \a unicode is 0, a null string is constructed.
2486
2487 If \a size is negative, \a unicode is assumed to point to a '\\0'-terminated
2488 array and its length is determined dynamically. The terminating
2489 null character is not considered part of the string.
2490
2491 QString makes a deep copy of the string data. The unicode data is copied as
2492 is and the Byte Order Mark is preserved if present.
2493
2494 \sa fromRawData()
2495*/
2496QString::QString(const QChar *unicode, qsizetype size)
2497{
2498 if (!unicode) {
2499 d.clear();
2500 } else {
2501 if (size < 0)
2502 size = QtPrivate::qustrlen(str: reinterpret_cast<const char16_t *>(unicode));
2503 if (!size) {
2504 d = DataPointer::fromRawData(rawData: &_empty, length: 0);
2505 } else {
2506 d = DataPointer(size, size);
2507 Q_CHECK_PTR(d.data());
2508 memcpy(dest: d.data(), src: unicode, n: size * sizeof(QChar));
2509 d.data()[size] = '\0';
2510 }
2511 }
2512}
2513
2514/*!
2515 Constructs a string of the given \a size with every character set
2516 to \a ch.
2517
2518 \sa fill()
2519*/
2520QString::QString(qsizetype size, QChar ch)
2521{
2522 if (size <= 0) {
2523 d = DataPointer::fromRawData(rawData: &_empty, length: 0);
2524 } else {
2525 d = DataPointer(size, size);
2526 Q_CHECK_PTR(d.data());
2527 d.data()[size] = '\0';
2528 char16_t *b = d.data();
2529 char16_t *e = d.data() + size;
2530 const char16_t value = ch.unicode();
2531 std::fill(first: b, last: e, value: value);
2532 }
2533}
2534
2535/*! \fn QString::QString(qsizetype size, Qt::Initialization)
2536 \internal
2537
2538 Constructs a string of the given \a size without initializing the
2539 characters. This is only used in \c QStringBuilder::toString().
2540*/
2541QString::QString(qsizetype size, Qt::Initialization)
2542{
2543 if (size <= 0) {
2544 d = DataPointer::fromRawData(rawData: &_empty, length: 0);
2545 } else {
2546 d = DataPointer(size, size);
2547 Q_CHECK_PTR(d.data());
2548 d.data()[size] = '\0';
2549 }
2550}
2551
2552/*! \fn QString::QString(QLatin1StringView str)
2553
2554 Constructs a copy of the Latin-1 string viewed by \a str.
2555
2556 \sa fromLatin1()
2557*/
2558
2559/*!
2560 Constructs a string of size 1 containing the character \a ch.
2561*/
2562QString::QString(QChar ch)
2563{
2564 d = DataPointer(1, 1);
2565 Q_CHECK_PTR(d.data());
2566 d.data()[0] = ch.unicode();
2567 d.data()[1] = '\0';
2568}
2569
2570/*! \fn QString::QString(const QByteArray &ba)
2571
2572 Constructs a string initialized with the byte array \a ba. The
2573 given byte array is converted to Unicode using fromUtf8().
2574
2575 You can disable this constructor by defining
2576 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2577 can be useful if you want to ensure that all user-visible strings
2578 go through QObject::tr(), for example.
2579
2580 \note Any null ('\\0') bytes in the byte array will be included in this
2581 string, converted to Unicode null characters (U+0000). This behavior is
2582 different from Qt 5.x.
2583
2584 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2585*/
2586
2587/*! \fn QString::QString(const Null &)
2588 \internal
2589*/
2590
2591/*! \fn QString::QString(QStringPrivate)
2592 \internal
2593*/
2594
2595/*! \fn QString &QString::operator=(const QString::Null &)
2596 \internal
2597*/
2598
2599/*!
2600 \fn QString::~QString()
2601
2602 Destroys the string.
2603*/
2604
2605
2606/*! \fn void QString::swap(QString &other)
2607 \since 4.8
2608 \memberswap{string}
2609*/
2610
2611/*! \fn void QString::detach()
2612
2613 \internal
2614*/
2615
2616/*! \fn bool QString::isDetached() const
2617
2618 \internal
2619*/
2620
2621/*! \fn bool QString::isSharedWith(const QString &other) const
2622
2623 \internal
2624*/
2625
2626/*! \fn QString::operator std::u16string_view() const
2627 \since 6.7
2628
2629 Converts this QString object to a \c{std::u16string_view} object.
2630*/
2631
2632static bool needsReallocate(const QString &str, qsizetype newSize)
2633{
2634 const auto capacityAtEnd = str.capacity() - str.data_ptr().freeSpaceAtBegin();
2635 return newSize > capacityAtEnd;
2636}
2637
2638/*!
2639 Sets the size of the string to \a size characters.
2640
2641 If \a size is greater than the current size, the string is
2642 extended to make it \a size characters long with the extra
2643 characters added to the end. The new characters are uninitialized.
2644
2645 If \a size is less than the current size, characters beyond position
2646 \a size are excluded from the string.
2647
2648 \note While resize() will grow the capacity if needed, it never shrinks
2649 capacity. To shed excess capacity, use squeeze().
2650
2651 Example:
2652
2653 \snippet qstring/main.cpp 45
2654
2655 If you want to append a certain number of identical characters to
2656 the string, use the \l {QString::}{resize(qsizetype, QChar)} overload.
2657
2658 If you want to expand the string so that it reaches a certain
2659 width and fill the new positions with a particular character, use
2660 the leftJustified() function:
2661
2662 If \a size is negative, it is equivalent to passing zero.
2663
2664 \snippet qstring/main.cpp 47
2665
2666 \sa truncate(), reserve(), squeeze()
2667*/
2668
2669void QString::resize(qsizetype size)
2670{
2671 if (size < 0)
2672 size = 0;
2673
2674 if (d->needsDetach() || needsReallocate(str: *this, newSize: size))
2675 reallocData(alloc: size, option: QArrayData::Grow);
2676 d.size = size;
2677 if (d->allocatedCapacity())
2678 d.data()[size] = u'\0';
2679}
2680
2681/*!
2682 \overload
2683 \since 5.7
2684
2685 Unlike \l {QString::}{resize(qsizetype)}, this overload
2686 initializes the new characters to \a fillChar:
2687
2688 \snippet qstring/main.cpp 46
2689*/
2690
2691void QString::resize(qsizetype newSize, QChar fillChar)
2692{
2693 const qsizetype oldSize = size();
2694 resize(size: newSize);
2695 const qsizetype difference = size() - oldSize;
2696 if (difference > 0)
2697 std::fill_n(first: d.data() + oldSize, n: difference, value: fillChar.unicode());
2698}
2699
2700
2701/*!
2702 \since 6.8
2703
2704 Sets the size of the string to \a size characters. If the size of
2705 the string grows, the new characters are uninitialized.
2706
2707 The behavior is identical to \c{resize(size)}.
2708
2709 \sa resize()
2710*/
2711
2712void QString::resizeForOverwrite(qsizetype size)
2713{
2714 resize(size);
2715}
2716
2717
2718/*! \fn qsizetype QString::capacity() const
2719
2720 Returns the maximum number of characters that can be stored in
2721 the string without forcing a reallocation.
2722
2723 The sole purpose of this function is to provide a means of fine
2724 tuning QString's memory usage. In general, you will rarely ever
2725 need to call this function. If you want to know how many
2726 characters are in the string, call size().
2727
2728 \note a statically allocated string will report a capacity of 0,
2729 even if it's not empty.
2730
2731 \note The free space position in the allocated memory block is undefined. In
2732 other words, one should not assume that the free memory is always located
2733 after the initialized elements.
2734
2735 \sa reserve(), squeeze()
2736*/
2737
2738/*!
2739 \fn void QString::reserve(qsizetype size)
2740
2741 Ensures the string has space for at least \a size characters.
2742
2743 If you know in advance how large a string will be, you can call this
2744 function to save repeated reallocation while building it.
2745 This can improve performance when building a string incrementally.
2746 A long sequence of operations that add to a string may trigger several
2747 reallocations, the last of which may leave you with significantly more
2748 space than you need. This is less efficient than doing a single
2749 allocation of the right size at the start.
2750
2751 If in doubt about how much space shall be needed, it is usually better to
2752 use an upper bound as \a size, or a high estimate of the most likely size,
2753 if a strict upper bound would be much bigger than this. If \a size is an
2754 underestimate, the string will grow as needed once the reserved size is
2755 exceeded, which may lead to a larger allocation than your best
2756 overestimate would have and will slow the operation that triggers it.
2757
2758 \warning reserve() reserves memory but does not change the size of the
2759 string. Accessing data beyond the end of the string is undefined behavior.
2760 If you need to access memory beyond the current end of the string,
2761 use resize().
2762
2763 This function is useful for code that needs to build up a long
2764 string and wants to avoid repeated reallocation. In this example,
2765 we want to add to the string until some condition is \c true, and
2766 we're fairly sure that size is large enough to make a call to
2767 reserve() worthwhile:
2768
2769 \snippet qstring/main.cpp 44
2770
2771 \sa squeeze(), capacity(), resize()
2772*/
2773
2774/*!
2775 \fn void QString::squeeze()
2776
2777 Releases any memory not required to store the character data.
2778
2779 The sole purpose of this function is to provide a means of fine
2780 tuning QString's memory usage. In general, you will rarely ever
2781 need to call this function.
2782
2783 \sa reserve(), capacity()
2784*/
2785
2786void QString::reallocData(qsizetype alloc, QArrayData::AllocationOption option)
2787{
2788 if (!alloc) {
2789 d = DataPointer::fromRawData(rawData: &_empty, length: 0);
2790 return;
2791 }
2792
2793 // don't use reallocate path when reducing capacity and there's free space
2794 // at the beginning: might shift data pointer outside of allocated space
2795 const bool cannotUseReallocate = d.freeSpaceAtBegin() > 0;
2796
2797 if (d->needsDetach() || cannotUseReallocate) {
2798 DataPointer dd(alloc, qMin(a: alloc, b: d.size), option);
2799 Q_CHECK_PTR(dd.data());
2800 if (dd.size > 0)
2801 ::memcpy(dest: dd.data(), src: d.data(), n: dd.size * sizeof(QChar));
2802 dd.data()[dd.size] = 0;
2803 d = dd;
2804 } else {
2805 d->reallocate(alloc, option);
2806 }
2807}
2808
2809void QString::reallocGrowData(qsizetype n)
2810{
2811 if (!n) // expected to always allocate
2812 n = 1;
2813
2814 if (d->needsDetach()) {
2815 DataPointer dd(DataPointer::allocateGrow(from: d, n, position: QArrayData::GrowsAtEnd));
2816 Q_CHECK_PTR(dd.data());
2817 dd->copyAppend(b: d.data(), e: d.data() + d.size);
2818 dd.data()[dd.size] = 0;
2819 d = dd;
2820 } else {
2821 d->reallocate(alloc: d.constAllocatedCapacity() + n, option: QArrayData::Grow);
2822 }
2823}
2824
2825/*! \fn void QString::clear()
2826
2827 Clears the contents of the string and makes it null.
2828
2829 \sa resize(), isNull()
2830*/
2831
2832/*! \fn QString &QString::operator=(const QString &other)
2833
2834 Assigns \a other to this string and returns a reference to this
2835 string.
2836*/
2837
2838QString &QString::operator=(const QString &other) noexcept
2839{
2840 d = other.d;
2841 return *this;
2842}
2843
2844/*!
2845 \fn QString &QString::operator=(QString &&other)
2846
2847 Move-assigns \a other to this QString instance.
2848
2849 \since 5.2
2850*/
2851
2852/*! \fn QString &QString::operator=(QLatin1StringView str)
2853
2854 \overload operator=()
2855
2856 Assigns the Latin-1 string viewed by \a str to this string.
2857*/
2858QString &QString::operator=(QLatin1StringView other)
2859{
2860 const qsizetype capacityAtEnd = capacity() - d.freeSpaceAtBegin();
2861 if (isDetached() && other.size() <= capacityAtEnd) { // assumes d->alloc == 0 -> !isDetached() (sharedNull)
2862 d.size = other.size();
2863 d.data()[other.size()] = 0;
2864 qt_from_latin1(dst: d.data(), str: other.latin1(), size: other.size());
2865 } else {
2866 *this = fromLatin1(str: other.latin1(), size: other.size());
2867 }
2868 return *this;
2869}
2870
2871/*! \fn QString &QString::operator=(const QByteArray &ba)
2872
2873 \overload operator=()
2874
2875 Assigns \a ba to this string. The byte array is converted to Unicode
2876 using the fromUtf8() function.
2877
2878 You can disable this operator by defining
2879 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2880 can be useful if you want to ensure that all user-visible strings
2881 go through QObject::tr(), for example.
2882*/
2883
2884/*! \fn QString &QString::operator=(const char *str)
2885
2886 \overload operator=()
2887
2888 Assigns \a str to this string. The const char pointer is converted
2889 to Unicode using the fromUtf8() function.
2890
2891 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
2892 or \l QT_RESTRICTED_CAST_FROM_ASCII when you compile your applications.
2893 This can be useful if you want to ensure that all user-visible strings
2894 go through QObject::tr(), for example.
2895*/
2896
2897/*!
2898 \overload operator=()
2899
2900 Sets the string to contain the single character \a ch.
2901*/
2902QString &QString::operator=(QChar ch)
2903{
2904 return assign(n: 1, c: ch);
2905}
2906
2907/*!
2908 \fn QString& QString::insert(qsizetype position, const QString &str)
2909
2910 Inserts the string \a str at the given index \a position and
2911 returns a reference to this string.
2912
2913 Example:
2914
2915 \snippet qstring/main.cpp 26
2916
2917//! [string-grow-at-insertion]
2918 This string grows to accommodate the insertion. If \a position is beyond
2919 the end of the string, space characters are appended to the string to reach
2920 this \a position, followed by \a str.
2921//! [string-grow-at-insertion]
2922
2923 \sa append(), prepend(), replace(), remove()
2924*/
2925
2926/*!
2927 \fn QString& QString::insert(qsizetype position, QStringView str)
2928 \since 6.0
2929 \overload insert()
2930
2931 Inserts the string view \a str at the given index \a position and
2932 returns a reference to this string.
2933
2934 \include qstring.cpp string-grow-at-insertion
2935*/
2936
2937
2938/*!
2939 \fn QString& QString::insert(qsizetype position, const char *str)
2940 \since 5.5
2941 \overload insert()
2942
2943 Inserts the C string \a str at the given index \a position and
2944 returns a reference to this string.
2945
2946 \include qstring.cpp string-grow-at-insertion
2947
2948 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2949 defined.
2950*/
2951
2952/*!
2953 \fn QString& QString::insert(qsizetype position, const QByteArray &str)
2954 \since 5.5
2955 \overload insert()
2956
2957 Interprets the contents of \a str as UTF-8, inserts the Unicode string
2958 it encodes at the given index \a position and returns a reference to
2959 this string.
2960
2961 \include qstring.cpp string-grow-at-insertion
2962
2963 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2964 defined.
2965*/
2966
2967/*! \internal
2968 T is a view or a container on/of QChar, char16_t, or char
2969*/
2970template <typename T>
2971static void insert_helper(QString &str, qsizetype i, const T &toInsert)
2972{
2973 auto &str_d = str.data_ptr();
2974 qsizetype difference = 0;
2975 if (Q_UNLIKELY(i > str_d.size))
2976 difference = i - str_d.size;
2977 const qsizetype oldSize = str_d.size;
2978 const qsizetype insert_size = toInsert.size();
2979 const qsizetype newSize = str_d.size + difference + insert_size;
2980 const auto side = i == 0 ? QArrayData::GrowsAtBeginning : QArrayData::GrowsAtEnd;
2981
2982 if (str_d.needsDetach() || needsReallocate(str, newSize)) {
2983 const auto cbegin = str.cbegin();
2984 const auto cend = str.cend();
2985 const auto insert_start = difference == 0 ? std::next(x: cbegin, n: i) : cend;
2986 QString other;
2987 // Using detachAndGrow() so that prepend optimization works and QStringBuilder
2988 // unittests pass
2989 other.data_ptr().detachAndGrow(where: side, n: newSize, data: nullptr, old: nullptr);
2990 other.append(v: QStringView(cbegin, insert_start));
2991 other.resize(newSize: i, fillChar: u' ');
2992 other.append(toInsert);
2993 other.append(v: QStringView(insert_start, cend));
2994 str.swap(other);
2995 return;
2996 }
2997
2998 str_d.detachAndGrow(where: side, n: difference + insert_size, data: nullptr, old: nullptr);
2999 Q_CHECK_PTR(str_d.data());
3000 str.resize(size: newSize);
3001
3002 auto begin = str_d.begin();
3003 auto old_end = std::next(x: begin, n: oldSize);
3004 std::fill_n(first: old_end, n: difference, value: u' ');
3005 auto insert_start = std::next(x: begin, n: i);
3006 if (difference == 0)
3007 std::move_backward(first: insert_start, last: old_end, result: str_d.end());
3008
3009 using Char = std::remove_cv_t<typename T::value_type>;
3010 if constexpr(std::is_same_v<Char, QChar>)
3011 std::copy_n(first: reinterpret_cast<const char16_t *>(toInsert.data()), n: insert_size, result: insert_start);
3012 else if constexpr (std::is_same_v<Char, char16_t>)
3013 std::copy_n(toInsert.data(), insert_size, insert_start);
3014 else if constexpr (std::is_same_v<Char, char>)
3015 qt_from_latin1(insert_start, toInsert.data(), insert_size);
3016}
3017
3018/*!
3019 \fn QString &QString::insert(qsizetype position, QLatin1StringView str)
3020 \overload insert()
3021
3022 Inserts the Latin-1 string viewed by \a str at the given index \a position.
3023
3024 \include qstring.cpp string-grow-at-insertion
3025*/
3026QString &QString::insert(qsizetype i, QLatin1StringView str)
3027{
3028 const char *s = str.latin1();
3029 if (i < 0 || !s || !(*s))
3030 return *this;
3031
3032 insert_helper(str&: *this, i, toInsert: str);
3033 return *this;
3034}
3035
3036/*!
3037 \fn QString &QString::insert(qsizetype position, QUtf8StringView str)
3038 \overload insert()
3039 \since 6.5
3040
3041 Inserts the UTF-8 string view \a str at the given index \a position.
3042
3043 \note Inserting variable-width UTF-8-encoded string data is conceptually slower
3044 than inserting fixed-width string data such as UTF-16 (QStringView) or Latin-1
3045 (QLatin1StringView) and should thus be used sparingly.
3046
3047 \include qstring.cpp string-grow-at-insertion
3048*/
3049QString &QString::insert(qsizetype i, QUtf8StringView s)
3050{
3051 auto insert_size = s.size();
3052 if (i < 0 || insert_size <= 0)
3053 return *this;
3054
3055 qsizetype difference = 0;
3056 if (Q_UNLIKELY(i > d.size))
3057 difference = i - d.size;
3058
3059 const qsizetype newSize = d.size + difference + insert_size;
3060
3061 if (d.needsDetach() || needsReallocate(str: *this, newSize)) {
3062 const auto cbegin = this->cbegin();
3063 const auto insert_start = difference == 0 ? std::next(x: cbegin, n: i) : cend();
3064 QString other;
3065 other.reserve(asize: newSize);
3066 other.append(v: QStringView(cbegin, insert_start));
3067 if (difference > 0)
3068 other.resize(newSize: i, fillChar: u' ');
3069 other.append(s);
3070 other.append(v: QStringView(insert_start, cend()));
3071 swap(other);
3072 return *this;
3073 }
3074
3075 if (i >= d.size) {
3076 d.detachAndGrow(where: QArrayData::GrowsAtEnd, n: difference + insert_size, data: nullptr, old: nullptr);
3077 Q_CHECK_PTR(d.data());
3078
3079 if (difference > 0)
3080 resize(newSize: i, fillChar: u' ');
3081 append(s);
3082 } else {
3083 // Optimal insertion of Utf8 data is at the end, anywhere else could
3084 // potentially lead to moving characters twice if Utf8 data size
3085 // (variable-width) is less than the equivalent Utf16 data size
3086 QVarLengthArray<char16_t> buffer(insert_size); // ### optimize (QTBUG-108546)
3087 char16_t *b = QUtf8::convertToUnicode(dst: buffer.data(), in: s);
3088 insert_helper(str&: *this, i, toInsert: QStringView(buffer.data(), b));
3089 }
3090
3091 return *this;
3092}
3093
3094/*!
3095 \fn QString& QString::insert(qsizetype position, const QChar *unicode, qsizetype size)
3096 \overload insert()
3097
3098 Inserts the first \a size characters of the QChar array \a unicode
3099 at the given index \a position in the string.
3100
3101 This string grows to accommodate the insertion. If \a position is beyond
3102 the end of the string, space characters are appended to the string to reach
3103 this \a position, followed by \a size characters of the QChar array
3104 \a unicode.
3105*/
3106QString& QString::insert(qsizetype i, const QChar *unicode, qsizetype size)
3107{
3108 if (i < 0 || size <= 0)
3109 return *this;
3110
3111 // In case when data points into "this"
3112 if (!d->needsDetach() && QtPrivate::q_points_into_range(p: unicode, c: *this)) {
3113 QVarLengthArray copy(unicode, unicode + size);
3114 insert(i, unicode: copy.data(), size);
3115 } else {
3116 insert_helper(str&: *this, i, toInsert: QStringView(unicode, size));
3117 }
3118
3119 return *this;
3120}
3121
3122/*!
3123 \fn QString& QString::insert(qsizetype position, QChar ch)
3124 \overload insert()
3125
3126 Inserts \a ch at the given index \a position in the string.
3127
3128 This string grows to accommodate the insertion. If \a position is beyond
3129 the end of the string, space characters are appended to the string to reach
3130 this \a position, followed by \a ch.
3131*/
3132
3133QString& QString::insert(qsizetype i, QChar ch)
3134{
3135 if (i < 0)
3136 i += d.size;
3137 return insert(i, unicode: &ch, size: 1);
3138}
3139
3140/*!
3141 Appends the string \a str onto the end of this string.
3142
3143 Example:
3144
3145 \snippet qstring/main.cpp 9
3146
3147 This is the same as using the insert() function:
3148
3149 \snippet qstring/main.cpp 10
3150
3151 The append() function is typically very fast (\l{constant time}),
3152 because QString preallocates extra space at the end of the string
3153 data so it can grow without reallocating the entire string each
3154 time.
3155
3156 \sa operator+=(), prepend(), insert()
3157*/
3158QString &QString::append(const QString &str)
3159{
3160 if (!str.isNull()) {
3161 if (isNull()) {
3162 if (Q_UNLIKELY(!str.d.isMutable()))
3163 assign(s: str); // fromRawData, so we do a deep copy
3164 else
3165 operator=(other: str);
3166 } else if (str.size()) {
3167 append(uc: str.constData(), len: str.size());
3168 }
3169 }
3170 return *this;
3171}
3172
3173/*!
3174 \fn QString &QString::append(QStringView v)
3175 \overload append()
3176 \since 6.0
3177
3178 Appends the given string view \a v to this string and returns the result.
3179*/
3180
3181/*!
3182 \overload append()
3183 \since 5.0
3184
3185 Appends \a len characters from the QChar array \a str to this string.
3186*/
3187QString &QString::append(const QChar *str, qsizetype len)
3188{
3189 if (str && len > 0) {
3190 static_assert(sizeof(QChar) == sizeof(char16_t), "Unexpected difference in sizes");
3191 // the following should be safe as QChar uses char16_t as underlying data
3192 const char16_t *char16String = reinterpret_cast<const char16_t *>(str);
3193 d->growAppend(b: char16String, e: char16String + len);
3194 d.data()[d.size] = u'\0';
3195 }
3196 return *this;
3197}
3198
3199/*!
3200 \overload append()
3201
3202 Appends the Latin-1 string viewed by \a str to this string.
3203*/
3204QString &QString::append(QLatin1StringView str)
3205{
3206 append_helper(self&: *this, view: str);
3207 return *this;
3208}
3209
3210/*!
3211 \overload append()
3212 \since 6.5
3213
3214 Appends the UTF-8 string view \a str to this string.
3215*/
3216QString &QString::append(QUtf8StringView str)
3217{
3218 append_helper(self&: *this, view: str);
3219 return *this;
3220}
3221
3222/*! \fn QString &QString::append(const QByteArray &ba)
3223
3224 \overload append()
3225
3226 Appends the byte array \a ba to this string. The given byte array
3227 is converted to Unicode using the fromUtf8() function.
3228
3229 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3230 when you compile your applications. This can be useful if you want
3231 to ensure that all user-visible strings go through QObject::tr(),
3232 for example.
3233*/
3234
3235/*! \fn QString &QString::append(const char *str)
3236
3237 \overload append()
3238
3239 Appends the string \a str to this string. The given const char
3240 pointer is converted to Unicode using the fromUtf8() function.
3241
3242 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3243 when you compile your applications. This can be useful if you want
3244 to ensure that all user-visible strings go through QObject::tr(),
3245 for example.
3246*/
3247
3248/*!
3249 \overload append()
3250
3251 Appends the character \a ch to this string.
3252*/
3253QString &QString::append(QChar ch)
3254{
3255 d.detachAndGrow(where: QArrayData::GrowsAtEnd, n: 1, data: nullptr, old: nullptr);
3256 d->copyAppend(n: 1, t: ch.unicode());
3257 d.data()[d.size] = '\0';
3258 return *this;
3259}
3260
3261/*! \fn QString &QString::prepend(const QString &str)
3262
3263 Prepends the string \a str to the beginning of this string and
3264 returns a reference to this string.
3265
3266 This operation is typically very fast (\l{constant time}), because
3267 QString preallocates extra space at the beginning of the string data,
3268 so it can grow without reallocating the entire string each time.
3269
3270 Example:
3271
3272 \snippet qstring/main.cpp 36
3273
3274 \sa append(), insert()
3275*/
3276
3277/*! \fn QString &QString::prepend(QLatin1StringView str)
3278
3279 \overload prepend()
3280
3281 Prepends the Latin-1 string viewed by \a str to this string.
3282*/
3283
3284/*! \fn QString &QString::prepend(QUtf8StringView str)
3285 \since 6.5
3286 \overload prepend()
3287
3288 Prepends the UTF-8 string view \a str to this string.
3289*/
3290
3291/*! \fn QString &QString::prepend(const QChar *str, qsizetype len)
3292 \since 5.5
3293 \overload prepend()
3294
3295 Prepends \a len characters from the QChar array \a str to this string and
3296 returns a reference to this string.
3297*/
3298
3299/*! \fn QString &QString::prepend(QStringView str)
3300 \since 6.0
3301 \overload prepend()
3302
3303 Prepends the string view \a str to the beginning of this string and
3304 returns a reference to this string.
3305*/
3306
3307/*! \fn QString &QString::prepend(const QByteArray &ba)
3308
3309 \overload prepend()
3310
3311 Prepends the byte array \a ba to this string. The byte array is
3312 converted to Unicode using the fromUtf8() function.
3313
3314 You can disable this function by defining
3315 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3316 can be useful if you want to ensure that all user-visible strings
3317 go through QObject::tr(), for example.
3318*/
3319
3320/*! \fn QString &QString::prepend(const char *str)
3321
3322 \overload prepend()
3323
3324 Prepends the string \a str to this string. The const char pointer
3325 is converted to Unicode using the fromUtf8() function.
3326
3327 You can disable this function by defining
3328 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3329 can be useful if you want to ensure that all user-visible strings
3330 go through QObject::tr(), for example.
3331*/
3332
3333/*! \fn QString &QString::prepend(QChar ch)
3334
3335 \overload prepend()
3336
3337 Prepends the character \a ch to this string.
3338*/
3339
3340/*!
3341 \fn QString &QString::assign(QAnyStringView v)
3342 \since 6.6
3343
3344 Replaces the contents of this string with a copy of \a v and returns a
3345 reference to this string.
3346
3347 The size of this string will be equal to the size of \a v, converted to
3348 UTF-16 as if by \c{v.toString()}. Unlike QAnyStringView::toString(), however,
3349 this function only allocates memory if the estimated size exceeds the capacity
3350 of this string or this string is shared.
3351
3352 \sa QAnyStringView::toString()
3353*/
3354
3355/*!
3356 \fn QString &QString::assign(qsizetype n, QChar c)
3357 \since 6.6
3358
3359 Replaces the contents of this string with \a n copies of \a c and
3360 returns a reference to this string.
3361
3362 The size of this string will be equal to \a n, which has to be non-negative.
3363
3364 This function will only allocate memory if \a n exceeds the capacity of this
3365 string or this string is shared.
3366
3367 \sa fill()
3368*/
3369
3370/*!
3371 \fn template <typename InputIterator, QString::if_compatible_iterator<InputIterator>> QString &QString::assign(InputIterator first, InputIterator last)
3372 \since 6.6
3373
3374 Replaces the contents of this string with a copy of the elements in the
3375 iterator range [\a first, \a last) and returns a reference to this string.
3376
3377 The size of this string will be equal to the decoded length of the elements
3378 in the range [\a first, \a last), which need not be the same as the length of
3379 the range itself, because this function transparently recodes the input
3380 character set to UTF-16.
3381
3382 This function will only allocate memory if the number of elements in the
3383 range, or, for non-UTF-16-encoded input, the maximum possible size of the
3384 resulting string, exceeds the capacity of this string, or if this string is
3385 shared.
3386
3387 \note This function overload only participates in overload resolution if
3388 \c InputIterator meets the requirements of a
3389 \l {https://en.cppreference.com/w/cpp/named_req/InputIterator} {LegacyInputIterator}
3390 and the \c{value_type} of \c InputIterator is one of the following character types:
3391 \list
3392 \li QChar
3393 \li QLatin1Char
3394 \li \c {char}
3395 \li \c {unsigned char}
3396 \li \c {signed char}
3397 \li \c {char8_t}
3398 \li \c char16_t
3399 \li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t
3400 \li \c char32_t
3401 \endlist
3402
3403 \note The behavior is undefined if either argument is an iterator into *this or
3404 [\a first, \a last) is not a valid range.
3405*/
3406
3407QString &QString::assign(QAnyStringView s)
3408{
3409 if (s.size() <= capacity() && isDetached()) {
3410 const auto offset = d.freeSpaceAtBegin();
3411 if (offset)
3412 d.setBegin(d.begin() - offset);
3413 resize(size: 0);
3414 s.visit(v: [this](auto input) {
3415 this->append(input);
3416 });
3417 } else {
3418 *this = s.toString();
3419 }
3420 return *this;
3421}
3422
3423#ifndef QT_BOOTSTRAPPED
3424QString &QString::assign_helper(const char32_t *data, qsizetype len)
3425{
3426 // worst case: each char32_t requires a surrogate pair, so
3427 const auto requiredCapacity = len * 2;
3428 if (requiredCapacity <= capacity() && isDetached()) {
3429 const auto offset = d.freeSpaceAtBegin();
3430 if (offset)
3431 d.setBegin(d.begin() - offset);
3432 auto begin = reinterpret_cast<QChar *>(d.begin());
3433 auto ba = QByteArrayView(reinterpret_cast<const std::byte*>(data), len * sizeof(char32_t));
3434 QStringConverter::State state;
3435 const auto end = QUtf32::convertToUnicode(out: begin, ba, state: &state, endian: DetectEndianness);
3436 d.size = end - begin;
3437 d.data()[d.size] = u'\0';
3438 } else {
3439 *this = QString::fromUcs4(data, size: len);
3440 }
3441 return *this;
3442}
3443#endif
3444
3445/*!
3446 \fn QString &QString::remove(qsizetype position, qsizetype n)
3447
3448 Removes \a n characters from the string, starting at the given \a
3449 position index, and returns a reference to the string.
3450
3451 If the specified \a position index is within the string, but \a
3452 position + \a n is beyond the end of the string, the string is
3453 truncated at the specified \a position.
3454
3455 If \a n is <= 0 nothing is changed.
3456
3457 \snippet qstring/main.cpp 37
3458
3459//! [shrinking-erase]
3460 Element removal will preserve the string's capacity and not reduce the
3461 amount of allocated memory. To shed extra capacity and free as much memory
3462 as possible, call squeeze() after the last change to the string's size.
3463//! [shrinking-erase]
3464
3465 \sa insert(), replace()
3466*/
3467QString &QString::remove(qsizetype pos, qsizetype len)
3468{
3469 if (pos < 0) // count from end of string
3470 pos += size();
3471
3472 if (size_t(pos) >= size_t(size()) || len <= 0)
3473 return *this;
3474
3475 len = std::min(a: len, b: size() - pos);
3476
3477 if (!d->isShared()) {
3478 d->erase(b: d.begin() + pos, n: len);
3479 d.data()[d.size] = u'\0';
3480 } else {
3481 // TODO: either reserve "size()", which is bigger than needed, or
3482 // modify the shrinking-erase docs of this method (since the size
3483 // of "copy" won't have any extra capacity any more)
3484 const qsizetype sz = size() - len;
3485 QString copy{sz, Qt::Uninitialized};
3486 auto begin = d.begin();
3487 auto toRemove_start = d.begin() + pos;
3488 copy.d->copyRanges(ranges: {{.begin: begin, .end: toRemove_start},
3489 {.begin: toRemove_start + len, .end: d.end()}});
3490 swap(other&: copy);
3491 }
3492 return *this;
3493}
3494
3495template<typename T>
3496static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs)
3497{
3498 const auto needleSize = needle.size();
3499 if (!needleSize)
3500 return;
3501
3502 // avoid detach if nothing to do:
3503 qsizetype i = s.indexOf(needle, 0, cs);
3504 if (i < 0)
3505 return;
3506
3507 QString::DataPointer &dptr = s.data_ptr();
3508 auto begin = dptr.begin();
3509 auto end = dptr.end();
3510
3511 auto copyFunc = [&](auto &dst) {
3512 auto src = begin + i + needleSize;
3513 while (src < end) {
3514 i = s.indexOf(needle, std::distance(begin, src), cs);
3515 auto hit = i == -1 ? end : begin + i;
3516 dst = std::copy(src, hit, dst);
3517 src = hit + needleSize;
3518 }
3519 return dst;
3520 };
3521
3522 if (!dptr->needsDetach()) {
3523 auto dst = begin + i;
3524 dst = copyFunc(dst);
3525 s.truncate(pos: std::distance(first: begin, last: dst));
3526 } else {
3527 QString copy{s.size(), Qt::Uninitialized};
3528 auto copy_begin = copy.begin();
3529 auto dst = std::copy(first: begin, last: begin + i, result: copy_begin); // Chunk before the first hit
3530 dst = copyFunc(dst);
3531 copy.resize(size: std::distance(first: copy_begin, last: dst));
3532 s.swap(other&: copy);
3533 }
3534}
3535
3536/*!
3537 Removes every occurrence of the given \a str string in this
3538 string, and returns a reference to this string.
3539
3540 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3541
3542 This is the same as \c replace(str, "", cs).
3543
3544 \include qstring.cpp shrinking-erase
3545
3546 \sa replace()
3547*/
3548QString &QString::remove(const QString &str, Qt::CaseSensitivity cs)
3549{
3550 const auto s = str.d.data();
3551 if (QtPrivate::q_points_into_range(p: s, c: d))
3552 removeStringImpl(s&: *this, needle: QStringView{QVarLengthArray(s, s + str.size())}, cs);
3553 else
3554 removeStringImpl(s&: *this, needle: qToStringViewIgnoringNull(s: str), cs);
3555 return *this;
3556}
3557
3558/*!
3559 \since 5.11
3560 \overload
3561
3562 Removes every occurrence of the given Latin-1 string viewed by \a str
3563 from this string, and returns a reference to this string.
3564
3565 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3566
3567 This is the same as \c replace(str, "", cs).
3568
3569 \include qstring.cpp shrinking-erase
3570
3571 \sa replace()
3572*/
3573QString &QString::remove(QLatin1StringView str, Qt::CaseSensitivity cs)
3574{
3575 removeStringImpl(s&: *this, needle: str, cs);
3576 return *this;
3577}
3578
3579/*!
3580 \fn QString &QString::removeAt(qsizetype pos)
3581
3582 \since 6.5
3583
3584 Removes the character at index \a pos. If \a pos is out of bounds
3585 (i.e. \a pos >= size()), this function does nothing.
3586
3587 \sa remove()
3588*/
3589
3590/*!
3591 \fn QString &QString::removeFirst()
3592
3593 \since 6.5
3594
3595 Removes the first character in this string. If the string is empty,
3596 this function does nothing.
3597
3598 \sa remove()
3599*/
3600
3601/*!
3602 \fn QString &QString::removeLast()
3603
3604 \since 6.5
3605
3606 Removes the last character in this string. If the string is empty,
3607 this function does nothing.
3608
3609 \sa remove()
3610*/
3611
3612/*!
3613 Removes every occurrence of the character \a ch in this string, and
3614 returns a reference to this string.
3615
3616 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3617
3618 Example:
3619
3620 \snippet qstring/main.cpp 38
3621
3622 This is the same as \c replace(ch, "", cs).
3623
3624 \include qstring.cpp shrinking-erase
3625
3626 \sa replace()
3627*/
3628QString &QString::remove(QChar ch, Qt::CaseSensitivity cs)
3629{
3630 const qsizetype idx = indexOf(c: ch, from: 0, cs);
3631 if (idx == -1)
3632 return *this;
3633
3634 const bool isCase = cs == Qt::CaseSensitive;
3635 ch = isCase ? ch : ch.toCaseFolded();
3636 auto match = [ch, isCase](QChar x) {
3637 return ch == (isCase ? x : x.toCaseFolded());
3638 };
3639
3640
3641 auto begin = d.begin();
3642 auto first_match = begin + idx;
3643 auto end = d.end();
3644 if (!d->isShared()) {
3645 auto it = std::remove_if(first: first_match, last: end, pred: match);
3646 d->erase(b: it, n: std::distance(first: it, last: end));
3647 d.data()[d.size] = u'\0';
3648 } else {
3649 // Instead of detaching, create a new string and copy all characters except for
3650 // the ones we're removing
3651 // TODO: size() is more than the needed since "copy" would be shorter
3652 QString copy{size(), Qt::Uninitialized};
3653 auto dst = copy.d.begin();
3654 auto it = std::copy(first: begin, last: first_match, result: dst); // Chunk before idx
3655 it = std::remove_copy_if(first: first_match + 1, last: end, result: it, pred: match);
3656 copy.d.size = std::distance(first: dst, last: it);
3657 copy.d.data()[copy.d.size] = u'\0';
3658 *this = std::move(copy);
3659 }
3660 return *this;
3661}
3662
3663/*!
3664 \fn QString &QString::remove(const QRegularExpression &re)
3665 \since 5.0
3666
3667 Removes every occurrence of the regular expression \a re in the
3668 string, and returns a reference to the string. For example:
3669
3670 \snippet qstring/main.cpp 96
3671
3672 \include qstring.cpp shrinking-erase
3673
3674 \sa indexOf(), lastIndexOf(), replace()
3675*/
3676
3677/*!
3678 \fn template <typename Predicate> QString &QString::removeIf(Predicate pred)
3679 \since 6.1
3680
3681 Removes all elements for which the predicate \a pred returns true
3682 from the string. Returns a reference to the string.
3683
3684 \sa remove()
3685*/
3686
3687
3688/*! \internal
3689 Instead of detaching, or reallocating if "before" is shorter than "after"
3690 and there isn't enough capacity, create a new string, copy characters to it
3691 as needed, then swap it with "str".
3692*/
3693static void replace_with_copy(QString &str, QSpan<size_t> indices, qsizetype blen,
3694 QStringView after)
3695{
3696 const qsizetype alen = after.size();
3697 const char16_t *after_b = after.utf16();
3698
3699 const QString::DataPointer &str_d = str.data_ptr();
3700 auto src_start = str_d.begin();
3701 const qsizetype newSize = str_d.size + indices.size() * (alen - blen);
3702 QString copy{ newSize, Qt::Uninitialized };
3703 QString::DataPointer &copy_d = copy.data_ptr();
3704 auto dst = copy_d.begin();
3705 for (size_t index : indices) {
3706 auto hit = str_d.begin() + index;
3707 dst = std::copy(first: src_start, last: hit, result: dst);
3708 dst = std::copy_n(first: after_b, n: alen, result: dst);
3709 src_start = hit + blen;
3710 }
3711 dst = std::copy(first: src_start, last: str_d.end(), result: dst);
3712 str.swap(other&: copy);
3713}
3714
3715// No detaching or reallocation is needed
3716static void replace_in_place(QString &str, QSpan<size_t> indices,
3717 qsizetype blen, QStringView after)
3718{
3719 const qsizetype alen = after.size();
3720 const char16_t *after_b = after.utf16();
3721 const char16_t *after_e = after.utf16() + after.size();
3722
3723 if (blen == alen) { // Replace in place
3724 for (size_t index : indices)
3725 std::copy_n(first: after_b, n: alen, result: str.data_ptr().begin() + index);
3726 } else if (blen > alen) { // Replace from front
3727 char16_t *begin = str.data_ptr().begin();
3728 char16_t *hit = begin + indices.front();
3729 char16_t *to = hit;
3730 to = std::copy_n(first: after_b, n: alen, result: to);
3731 char16_t *movestart = hit + blen;
3732 for (size_t index : indices.sliced(pos: 1)) {
3733 hit = begin + index;
3734 to = std::move(first: movestart, last: hit, result: to);
3735 to = std::copy_n(first: after_b, n: alen, result: to);
3736 movestart = hit + blen;
3737 }
3738 to = std::move(first: movestart, last: str.data_ptr().end(), result: to);
3739 str.resize(size: std::distance(first: begin, last: to));
3740 } else { // blen < alen, Replace from back
3741 const qsizetype oldSize = str.data_ptr().size;
3742 const qsizetype adjust = indices.size() * (alen - blen);
3743 const qsizetype newSize = oldSize + adjust;
3744
3745 str.resize(size: newSize);
3746 char16_t *begin = str.data_ptr().begin();
3747 char16_t *moveend = begin + oldSize;
3748 char16_t *to = str.data_ptr().end();
3749
3750 for (auto it = indices.rbegin(), end = indices.rend(); it != end; ++it) {
3751 char16_t *hit = begin + *it;
3752 char16_t *movestart = hit + blen;
3753 to = std::move_backward(first: movestart, last: moveend, result: to);
3754 to = std::copy_backward(first: after_b, last: after_e, result: to);
3755 moveend = hit;
3756 }
3757 }
3758}
3759
3760static void replace_helper(QString &str, QSpan<size_t> indices, qsizetype blen, QStringView after)
3761{
3762 const qsizetype oldSize = str.data_ptr().size;
3763 const qsizetype adjust = indices.size() * (after.size() - blen);
3764 const qsizetype newSize = oldSize + adjust;
3765 if (str.data_ptr().needsDetach()) {
3766 replace_with_copy(str, indices, blen, after);
3767 return;
3768 }
3769
3770 str.reserve(asize: newSize);
3771
3772 if (QtPrivate::q_points_into_range(p: after.begin(), c: str))
3773 // Copy after if it lies inside our own d.b area (which we could
3774 // possibly invalidate via a realloc or modify by replacement)
3775 replace_in_place(str, indices, blen, after: QVarLengthArray(after.begin(), after.end()));
3776 else
3777 replace_in_place(str, indices, blen, after);
3778}
3779
3780/*!
3781 \fn QString &QString::replace(qsizetype position, qsizetype n, const QString &after)
3782
3783 Replaces \a n characters beginning at index \a position with
3784 the string \a after and returns a reference to this string.
3785
3786 \note If the specified \a position index is within the string,
3787 but \a position + \a n goes outside the strings range,
3788 then \a n will be adjusted to stop at the end of the string.
3789
3790 Example:
3791
3792 \snippet qstring/main.cpp 40
3793
3794 \sa insert(), remove()
3795*/
3796QString &QString::replace(qsizetype pos, qsizetype len, const QString &after)
3797{
3798 return replace(i: pos, len, s: after.constData(), slen: after.size());
3799}
3800
3801/*!
3802 \fn QString &QString::replace(qsizetype position, qsizetype n, const QChar *after, qsizetype alen)
3803 \overload replace()
3804 Replaces \a n characters beginning at index \a position with the
3805 first \a alen characters of the QChar array \a after and returns a
3806 reference to this string.
3807*/
3808QString &QString::replace(qsizetype pos, qsizetype len, const QChar *after, qsizetype alen)
3809{
3810 if (size_t(pos) > size_t(this->size()))
3811 return *this;
3812 if (len > this->size() - pos)
3813 len = this->size() - pos;
3814
3815 size_t index = pos;
3816 replace_helper(str&: *this, indices: QSpan(&index, 1), blen: len, after: QStringView{after, alen});
3817 return *this;
3818}
3819
3820/*!
3821 \fn QString &QString::replace(qsizetype position, qsizetype n, QChar after)
3822 \overload replace()
3823
3824 Replaces \a n characters beginning at index \a position with the
3825 character \a after and returns a reference to this string.
3826*/
3827QString &QString::replace(qsizetype pos, qsizetype len, QChar after)
3828{
3829 return replace(pos, len, after: &after, alen: 1);
3830}
3831
3832/*!
3833 \overload replace()
3834 Replaces every occurrence of the string \a before with the string \a
3835 after and returns a reference to this string.
3836
3837 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3838
3839 Example:
3840
3841 \snippet qstring/main.cpp 41
3842
3843 \note The replacement text is not rescanned after it is inserted.
3844
3845 Example:
3846
3847 \snippet qstring/main.cpp 86
3848
3849//! [empty-before-arg-in-replace]
3850 \note If you use an empty \a before argument, the \a after argument will be
3851 inserted \e {before and after} each character of the string.
3852//! [empty-before-arg-in-replace]
3853
3854*/
3855QString &QString::replace(const QString &before, const QString &after, Qt::CaseSensitivity cs)
3856{
3857 return replace(before: before.constData(), blen: before.size(), after: after.constData(), alen: after.size(), cs);
3858}
3859
3860/*!
3861 \since 4.5
3862 \overload replace()
3863
3864 Replaces each occurrence in this string of the first \a blen
3865 characters of \a before with the first \a alen characters of \a
3866 after and returns a reference to this string.
3867
3868 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3869
3870 \note If \a before points to an \e empty string (that is, \a blen == 0),
3871 the string pointed to by \a after will be inserted \e {before and after}
3872 each character in this string.
3873*/
3874QString &QString::replace(const QChar *before, qsizetype blen,
3875 const QChar *after, qsizetype alen,
3876 Qt::CaseSensitivity cs)
3877{
3878 if (d.size == 0) {
3879 if (blen)
3880 return *this;
3881 } else {
3882 if (cs == Qt::CaseSensitive && before == after && blen == alen)
3883 return *this;
3884 }
3885 if (alen == 0 && blen == 0)
3886 return *this;
3887 if (alen == 1 && blen == 1)
3888 return replace(before: *before, after: *after, cs);
3889
3890 QStringMatcher matcher(before, blen, cs);
3891
3892 qsizetype index = 0;
3893
3894 QVarLengthArray<size_t> indices;
3895 while ((index = matcher.indexIn(str: *this, from: index)) != -1) {
3896 indices.push_back(t: index);
3897 if (blen) // Step over before:
3898 index += blen;
3899 else // Only count one instance of empty between any two characters:
3900 index++;
3901 }
3902 if (indices.isEmpty())
3903 return *this;
3904
3905 replace_helper(str&: *this, indices, blen, after: QStringView{after, alen});
3906 return *this;
3907}
3908
3909/*!
3910 \overload replace()
3911 Replaces every occurrence of the character \a ch in the string with
3912 \a after and returns a reference to this string.
3913
3914 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3915*/
3916QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs)
3917{
3918 if (after.size() == 0)
3919 return remove(ch, cs);
3920
3921 if (after.size() == 1)
3922 return replace(before: ch, after: after.front(), cs);
3923
3924 if (size() == 0)
3925 return *this;
3926
3927 const char16_t cc = (cs == Qt::CaseSensitive ? ch.unicode() : ch.toCaseFolded().unicode());
3928
3929 QVarLengthArray<size_t> indices;
3930 if (cs == Qt::CaseSensitive) {
3931 const char16_t *begin = d.begin();
3932 const char16_t *end = d.end();
3933 QStringView view(begin, end);
3934 const char16_t *hit = nullptr;
3935 while ((hit = QtPrivate::qustrchr(str: view, c: cc)) != end) {
3936 indices.push_back(t: std::distance(first: begin, last: hit));
3937 view = QStringView(std::next(x: hit), end);
3938 }
3939 } else {
3940 for (qsizetype i = 0; i < d.size; ++i)
3941 if (QChar::toCaseFolded(ucs4: d.data()[i]) == cc)
3942 indices.push_back(t: i);
3943 }
3944 if (indices.isEmpty())
3945 return *this;
3946
3947 replace_helper(str&: *this, indices, blen: 1, after);
3948 return *this;
3949}
3950
3951/*!
3952 \overload replace()
3953 Replaces every occurrence of the character \a before with the
3954 character \a after and returns a reference to this string.
3955
3956 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3957*/
3958QString& QString::replace(QChar before, QChar after, Qt::CaseSensitivity cs)
3959{
3960 const qsizetype idx = indexOf(c: before, from: 0, cs);
3961 if (idx == -1)
3962 return *this;
3963
3964 const char16_t achar = after.unicode();
3965 char16_t bchar = before.unicode();
3966
3967 auto matchesCIS = [](char16_t beforeChar) {
3968 return [beforeChar](char16_t ch) { return foldAndCompare(a: ch, b: beforeChar); };
3969 };
3970
3971 auto hit = d.begin() + idx;
3972 if (!d.needsDetach()) {
3973 *hit++ = achar;
3974 if (cs == Qt::CaseSensitive) {
3975 std::replace(first: hit, last: d.end(), old_value: bchar, new_value: achar);
3976 } else {
3977 bchar = foldCase(ch: bchar);
3978 std::replace_if(first: hit, last: d.end(), pred: matchesCIS(bchar), new_value: achar);
3979 }
3980 } else {
3981 QString other{ d.size, Qt::Uninitialized };
3982 auto dest = std::copy(first: d.begin(), last: hit, result: other.d.begin());
3983 *dest++ = achar;
3984 ++hit;
3985 if (cs == Qt::CaseSensitive) {
3986 std::replace_copy(first: hit, last: d.end(), result: dest, old_value: bchar, new_value: achar);
3987 } else {
3988 bchar = foldCase(ch: bchar);
3989 std::replace_copy_if(first: hit, last: d.end(), result: dest, pred: matchesCIS(bchar), new_value: achar);
3990 }
3991
3992 swap(other);
3993 }
3994 return *this;
3995}
3996
3997/*!
3998 \since 4.5
3999 \overload replace()
4000
4001 Replaces every occurrence in this string of the Latin-1 string viewed
4002 by \a before with the Latin-1 string viewed by \a after, and returns a
4003 reference to this string.
4004
4005 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4006
4007 \note The text is not rescanned after a replacement.
4008
4009 \include qstring.cpp empty-before-arg-in-replace
4010*/
4011QString &QString::replace(QLatin1StringView before, QLatin1StringView after, Qt::CaseSensitivity cs)
4012{
4013 const qsizetype alen = after.size();
4014 const qsizetype blen = before.size();
4015 if (blen == 1 && alen == 1)
4016 return replace(before: before.front(), after: after.front(), cs);
4017
4018 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(str: after);
4019 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(str: before);
4020 return replace(before: (const QChar *)b.data(), blen, after: (const QChar *)a.data(), alen, cs);
4021}
4022
4023/*!
4024 \since 4.5
4025 \overload replace()
4026
4027 Replaces every occurrence in this string of the Latin-1 string viewed
4028 by \a before with the string \a after, and returns a reference to this
4029 string.
4030
4031 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4032
4033 \note The text is not rescanned after a replacement.
4034
4035 \include qstring.cpp empty-before-arg-in-replace
4036*/
4037QString &QString::replace(QLatin1StringView before, const QString &after, Qt::CaseSensitivity cs)
4038{
4039 const qsizetype blen = before.size();
4040 if (blen == 1 && after.size() == 1)
4041 return replace(before: before.front(), after: after.front(), cs);
4042
4043 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(str: before);
4044 return replace(before: (const QChar *)b.data(), blen, after: after.constData(), alen: after.d.size, cs);
4045}
4046
4047/*!
4048 \since 4.5
4049 \overload replace()
4050
4051 Replaces every occurrence of the string \a before with the string \a
4052 after and returns a reference to this string.
4053
4054 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4055
4056 \note The text is not rescanned after a replacement.
4057
4058 \include qstring.cpp empty-before-arg-in-replace
4059*/
4060QString &QString::replace(const QString &before, QLatin1StringView after, Qt::CaseSensitivity cs)
4061{
4062 const qsizetype alen = after.size();
4063 if (before.size() == 1 && alen == 1)
4064 return replace(before: before.front(), after: after.front(), cs);
4065
4066 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(str: after);
4067 return replace(before: before.constData(), blen: before.d.size, after: (const QChar *)a.data(), alen, cs);
4068}
4069
4070/*!
4071 \since 4.5
4072 \overload replace()
4073
4074 Replaces every occurrence of the character \a c with the string \a
4075 after and returns a reference to this string.
4076
4077 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4078
4079 \note The text is not rescanned after a replacement.
4080*/
4081QString &QString::replace(QChar c, QLatin1StringView after, Qt::CaseSensitivity cs)
4082{
4083 const qsizetype alen = after.size();
4084 if (alen == 1)
4085 return replace(before: c, after: after.front(), cs);
4086
4087 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(str: after);
4088 return replace(before: &c, blen: 1, after: (const QChar *)a.data(), alen, cs);
4089}
4090
4091/*!
4092 \fn bool QString::operator==(const QString &lhs, const QString &rhs)
4093 \overload operator==()
4094
4095 Returns \c true if string \a lhs is equal to string \a rhs; otherwise
4096 returns \c false.
4097
4098 \include qstring.cpp compare-isNull-vs-isEmpty
4099
4100 \sa {Comparing Strings}
4101*/
4102
4103/*!
4104 \fn bool QString::operator==(const QString &lhs, const QLatin1StringView &rhs)
4105
4106 \overload operator==()
4107
4108 Returns \c true if \a lhs is equal to \a rhs; otherwise
4109 returns \c false.
4110*/
4111
4112/*!
4113 \fn bool QString::operator==(const QLatin1StringView &lhs, const QString &rhs)
4114
4115 \overload operator==()
4116
4117 Returns \c true if \a lhs is equal to \a rhs; otherwise
4118 returns \c false.
4119*/
4120
4121/*! \fn bool QString::operator==(const QString &lhs, const QByteArray &rhs)
4122
4123 \overload operator==()
4124
4125 The \a rhs byte array is converted to a QUtf8StringView.
4126
4127 You can disable this operator by defining
4128 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4129 can be useful if you want to ensure that all user-visible strings
4130 go through QObject::tr(), for example.
4131
4132 Returns \c true if string \a lhs is lexically equal to \a rhs.
4133 Otherwise returns \c false.
4134*/
4135
4136/*! \fn bool QString::operator==(const QString &lhs, const char * const &rhs)
4137
4138 \overload operator==()
4139
4140 The \a rhs const char pointer is converted to a QUtf8StringView.
4141
4142 You can disable this operator by defining
4143 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4144 can be useful if you want to ensure that all user-visible strings
4145 go through QObject::tr(), for example.
4146*/
4147
4148/*!
4149 \fn bool QString::operator<(const QString &lhs, const QString &rhs)
4150
4151 \overload operator<()
4152
4153 Returns \c true if string \a lhs is lexically less than string
4154 \a rhs; otherwise returns \c false.
4155
4156 \sa {Comparing Strings}
4157*/
4158
4159/*!
4160 \fn bool QString::operator<(const QString &lhs, const QLatin1StringView &rhs)
4161
4162 \overload operator<()
4163
4164 Returns \c true if \a lhs is lexically less than \a rhs;
4165 otherwise returns \c false.
4166*/
4167
4168/*!
4169 \fn bool QString::operator<(const QLatin1StringView &lhs, const QString &rhs)
4170
4171 \overload operator<()
4172
4173 Returns \c true if \a lhs is lexically less than \a rhs;
4174 otherwise returns \c false.
4175*/
4176
4177/*! \fn bool QString::operator<(const QString &lhs, const QByteArray &rhs)
4178
4179 \overload operator<()
4180
4181 The \a rhs byte array is converted to a QUtf8StringView.
4182 If any NUL characters ('\\0') are embedded in the byte array, they will be
4183 included in the transformation.
4184
4185 You can disable this operator
4186 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4187 can be useful if you want to ensure that all user-visible strings
4188 go through QObject::tr(), for example.
4189*/
4190
4191/*! \fn bool QString::operator<(const QString &lhs, const char * const &rhs)
4192
4193 Returns \c true if string \a lhs is lexically less than string \a rhs.
4194 Otherwise returns \c false.
4195
4196 \overload operator<()
4197
4198 The \a rhs const char pointer is converted to a QUtf8StringView.
4199
4200 You can disable this operator by defining
4201 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4202 can be useful if you want to ensure that all user-visible strings
4203 go through QObject::tr(), for example.
4204*/
4205
4206/*! \fn bool QString::operator<=(const QString &lhs, const QString &rhs)
4207
4208 Returns \c true if string \a lhs is lexically less than or equal to
4209 string \a rhs; otherwise returns \c false.
4210
4211 \sa {Comparing Strings}
4212*/
4213
4214/*!
4215 \fn bool QString::operator<=(const QString &lhs, const QLatin1StringView &rhs)
4216
4217 \overload operator<=()
4218
4219 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4220 otherwise returns \c false.
4221*/
4222
4223/*!
4224 \fn bool QString::operator<=(const QLatin1StringView &lhs, const QString &rhs)
4225
4226 \overload operator<=()
4227
4228 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4229 otherwise returns \c false.
4230*/
4231
4232/*! \fn bool QString::operator<=(const QString &lhs, const QByteArray &rhs)
4233
4234 \overload operator<=()
4235
4236 The \a rhs byte array is converted to a QUtf8StringView.
4237 If any NUL characters ('\\0') are embedded in the byte array, they will be
4238 included in the transformation.
4239
4240 You can disable this operator by defining
4241 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4242 can be useful if you want to ensure that all user-visible strings
4243 go through QObject::tr(), for example.
4244*/
4245
4246/*! \fn bool QString::operator<=(const QString &lhs, const char * const &rhs)
4247
4248 \overload operator<=()
4249
4250 The \a rhs const char pointer is converted to a QUtf8StringView.
4251
4252 You can disable this operator by defining
4253 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4254 can be useful if you want to ensure that all user-visible strings
4255 go through QObject::tr(), for example.
4256*/
4257
4258/*! \fn bool QString::operator>(const QString &lhs, const QString &rhs)
4259
4260 Returns \c true if string \a lhs is lexically greater than string \a rhs;
4261 otherwise returns \c false.
4262
4263 \sa {Comparing Strings}
4264*/
4265
4266/*!
4267 \fn bool QString::operator>(const QString &lhs, const QLatin1StringView &rhs)
4268
4269 \overload operator>()
4270
4271 Returns \c true if \a lhs is lexically greater than \a rhs;
4272 otherwise returns \c false.
4273*/
4274
4275/*!
4276 \fn bool QString::operator>(const QLatin1StringView &lhs, const QString &rhs)
4277
4278 \overload operator>()
4279
4280 Returns \c true if \a lhs is lexically greater than \a rhs;
4281 otherwise returns \c false.
4282*/
4283
4284/*! \fn bool QString::operator>(const QString &lhs, const QByteArray &rhs)
4285
4286 \overload operator>()
4287
4288 The \a rhs byte array is converted to a QUtf8StringView.
4289 If any NUL characters ('\\0') are embedded in the byte array, they will be
4290 included in the transformation.
4291
4292 You can disable this operator by defining
4293 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4294 can be useful if you want to ensure that all user-visible strings
4295 go through QObject::tr(), for example.
4296*/
4297
4298/*! \fn bool QString::operator>(const QString &lhs, const char * const &rhs)
4299
4300 \overload operator>()
4301
4302 The \a rhs const char pointer is converted to a QUtf8StringView.
4303
4304 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4305 when you compile your applications. This can be useful if you want
4306 to ensure that all user-visible strings go through QObject::tr(),
4307 for example.
4308*/
4309
4310/*! \fn bool QString::operator>=(const QString &lhs, const QString &rhs)
4311
4312 Returns \c true if string \a lhs is lexically greater than or equal to
4313 string \a rhs; otherwise returns \c false.
4314
4315 \sa {Comparing Strings}
4316*/
4317
4318/*!
4319 \fn bool QString::operator>=(const QString &lhs, const QLatin1StringView &rhs)
4320
4321 \overload operator>=()
4322
4323 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4324 otherwise returns \c false.
4325*/
4326
4327/*!
4328 \fn bool QString::operator>=(const QLatin1StringView &lhs, const QString &rhs)
4329
4330 \overload operator>=()
4331
4332 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4333 otherwise returns \c false.
4334*/
4335
4336/*! \fn bool QString::operator>=(const QString &lhs, const QByteArray &rhs)
4337
4338 \overload operator>=()
4339
4340 The \a rhs byte array is converted to a QUtf8StringView.
4341 If any NUL characters ('\\0') are embedded in the byte array, they will be
4342 included in the transformation.
4343
4344 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4345 when you compile your applications. This can be useful if you want
4346 to ensure that all user-visible strings go through QObject::tr(),
4347 for example.
4348*/
4349
4350/*! \fn bool QString::operator>=(const QString &lhs, const char * const &rhs)
4351
4352 \overload operator>=()
4353
4354 The \a rhs const char pointer is converted to a QUtf8StringView.
4355
4356 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4357 when you compile your applications. This can be useful if you want
4358 to ensure that all user-visible strings go through QObject::tr(),
4359 for example.
4360*/
4361
4362/*! \fn bool QString::operator!=(const QString &lhs, const QString &rhs)
4363
4364 Returns \c true if string \a lhs is not equal to string \a rhs;
4365 otherwise returns \c false.
4366
4367 \sa {Comparing Strings}
4368*/
4369
4370/*! \fn bool QString::operator!=(const QString &lhs, const QLatin1StringView &rhs)
4371
4372 Returns \c true if string \a lhs is not equal to string \a rhs.
4373 Otherwise returns \c false.
4374
4375 \overload operator!=()
4376*/
4377
4378/*! \fn bool QString::operator!=(const QString &lhs, const QByteArray &rhs)
4379
4380 \overload operator!=()
4381
4382 The \a rhs byte array is converted to a QUtf8StringView.
4383 If any NUL characters ('\\0') are embedded in the byte array, they will be
4384 included in the transformation.
4385
4386 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4387 when you compile your applications. This can be useful if you want
4388 to ensure that all user-visible strings go through QObject::tr(),
4389 for example.
4390*/
4391
4392/*! \fn bool QString::operator!=(const QString &lhs, const char * const &rhs)
4393
4394 \overload operator!=()
4395
4396 The \a rhs const char pointer is converted to a QUtf8StringView.
4397
4398 You can disable this operator by defining
4399 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4400 can be useful if you want to ensure that all user-visible strings
4401 go through QObject::tr(), for example.
4402*/
4403
4404/*! \fn bool QString::operator==(const QByteArray &lhs, const QString &rhs)
4405
4406 Returns \c true if byte array \a lhs is equal to the UTF-8 encoding of
4407 \a rhs; otherwise returns \c false.
4408
4409 The comparison is case sensitive.
4410
4411 You can disable this operator by defining \c
4412 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4413 then need to call QString::fromUtf8(), QString::fromLatin1(),
4414 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4415 array to a QString before doing the comparison.
4416*/
4417
4418/*! \fn bool QString::operator!=(const QByteArray &lhs, const QString &rhs)
4419
4420 Returns \c true if byte array \a lhs is not equal to the UTF-8 encoding of
4421 \a rhs; otherwise returns \c false.
4422
4423 The comparison is case sensitive.
4424
4425 You can disable this operator by defining \c
4426 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4427 then need to call QString::fromUtf8(), QString::fromLatin1(),
4428 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4429 array to a QString before doing the comparison.
4430*/
4431
4432/*! \fn bool QString::operator<(const QByteArray &lhs, const QString &rhs)
4433
4434 Returns \c true if byte array \a lhs is lexically less than the UTF-8 encoding
4435 of \a rhs; otherwise returns \c false.
4436
4437 The comparison is case sensitive.
4438
4439 You can disable this operator by defining \c
4440 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4441 then need to call QString::fromUtf8(), QString::fromLatin1(),
4442 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4443 array to a QString before doing the comparison.
4444*/
4445
4446/*! \fn bool QString::operator>(const QByteArray &lhs, const QString &rhs)
4447
4448 Returns \c true if byte array \a lhs is lexically greater than the UTF-8
4449 encoding of \a rhs; otherwise returns \c false.
4450
4451 The comparison is case sensitive.
4452
4453 You can disable this operator by defining \c
4454 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4455 then need to call QString::fromUtf8(), QString::fromLatin1(),
4456 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4457 array to a QString before doing the comparison.
4458*/
4459
4460/*! \fn bool QString::operator<=(const QByteArray &lhs, const QString &rhs)
4461
4462 Returns \c true if byte array \a lhs is lexically less than or equal to the
4463 UTF-8 encoding of \a rhs; otherwise returns \c false.
4464
4465 The comparison is case sensitive.
4466
4467 You can disable this operator by defining \c
4468 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4469 then need to call QString::fromUtf8(), QString::fromLatin1(),
4470 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4471 array to a QString before doing the comparison.
4472*/
4473
4474/*! \fn bool QString::operator>=(const QByteArray &lhs, const QString &rhs)
4475
4476 Returns \c true if byte array \a lhs is greater than or equal to the UTF-8
4477 encoding of \a rhs; otherwise returns \c false.
4478
4479 The comparison is case sensitive.
4480
4481 You can disable this operator by defining \c
4482 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4483 then need to call QString::fromUtf8(), QString::fromLatin1(),
4484 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4485 array to a QString before doing the comparison.
4486*/
4487
4488/*!
4489 \include qstring.qdocinc {qstring-first-index-of} {string} {str}
4490
4491 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4492
4493 Example:
4494
4495 \snippet qstring/main.cpp 24
4496
4497 \include qstring.qdocinc negative-index-start-search-from-end
4498
4499 \sa lastIndexOf(), contains(), count()
4500*/
4501qsizetype QString::indexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4502{
4503 return QtPrivate::findString(haystack: QStringView(unicode(), size()), from, needle: QStringView(str.unicode(), str.size()), cs);
4504}
4505
4506/*!
4507 \fn qsizetype QString::indexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4508 \since 5.14
4509 \overload indexOf()
4510
4511 \include qstring.qdocinc {qstring-first-index-of} {string view} {str}
4512
4513 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4514
4515 \include qstring.qdocinc negative-index-start-search-from-end
4516
4517 \sa QStringView::indexOf(), lastIndexOf(), contains(), count()
4518*/
4519
4520/*!
4521 \since 4.5
4522
4523 \include {qstring.qdocinc} {qstring-first-index-of} {Latin-1 string viewed by} {str}
4524
4525 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4526
4527 Example:
4528
4529 \snippet qstring/main.cpp 24
4530
4531 \include qstring.qdocinc negative-index-start-search-from-end
4532
4533 \sa lastIndexOf(), contains(), count()
4534*/
4535
4536qsizetype QString::indexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4537{
4538 return QtPrivate::findString(haystack: QStringView(unicode(), size()), from, needle: str, cs);
4539}
4540
4541/*!
4542 \fn qsizetype QString::indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4543 \overload indexOf()
4544
4545 \include qstring.qdocinc {qstring-first-index-of} {character} {ch}
4546*/
4547
4548/*!
4549 \include qstring.qdocinc {qstring-last-index-of} {string} {str}
4550
4551 \include qstring.qdocinc negative-index-start-search-from-end
4552
4553 Returns -1 if \a str is not found.
4554
4555 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4556
4557 Example:
4558
4559 \snippet qstring/main.cpp 29
4560
4561 \note When searching for a 0-length \a str, the match at the end of
4562 the data is excluded from the search by a negative \a from, even
4563 though \c{-1} is normally thought of as searching from the end of the
4564 string: the match at the end is \e after the last character, so it is
4565 excluded. To include such a final empty match, either give a positive
4566 value for \a from or omit the \a from parameter entirely.
4567
4568 \sa indexOf(), contains(), count()
4569*/
4570qsizetype QString::lastIndexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4571{
4572 return QtPrivate::lastIndexOf(haystack: QStringView(*this), from, needle: str, cs);
4573}
4574
4575/*!
4576 \fn qsizetype QString::lastIndexOf(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4577 \since 6.2
4578 \overload lastIndexOf()
4579
4580 Returns the index position of the last occurrence of the string \a
4581 str in this string. Returns -1 if \a str is not found.
4582
4583 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4584
4585 Example:
4586
4587 \snippet qstring/main.cpp 29
4588
4589 \sa indexOf(), contains(), count()
4590*/
4591
4592
4593/*!
4594 \since 4.5
4595 \overload lastIndexOf()
4596
4597 \include qstring.qdocinc {qstring-last-index-of} {Latin-1 string viewed by} {str}
4598
4599 \include qstring.qdocinc negative-index-start-search-from-end
4600
4601 Returns -1 if \a str is not found.
4602
4603 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4604
4605 Example:
4606
4607 \snippet qstring/main.cpp 29
4608
4609 \note When searching for a 0-length \a str, the match at the end of
4610 the data is excluded from the search by a negative \a from, even
4611 though \c{-1} is normally thought of as searching from the end of the
4612 string: the match at the end is \e after the last character, so it is
4613 excluded. To include such a final empty match, either give a positive
4614 value for \a from or omit the \a from parameter entirely.
4615
4616 \sa indexOf(), contains(), count()
4617*/
4618qsizetype QString::lastIndexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4619{
4620 return QtPrivate::lastIndexOf(haystack: *this, from, needle: str, cs);
4621}
4622
4623/*!
4624 \fn qsizetype QString::lastIndexOf(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4625 \since 6.2
4626 \overload lastIndexOf()
4627
4628 Returns the index position of the last occurrence of the string \a
4629 str in this string. Returns -1 if \a str is not found.
4630
4631 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4632
4633 Example:
4634
4635 \snippet qstring/main.cpp 29
4636
4637 \sa indexOf(), contains(), count()
4638*/
4639
4640/*!
4641 \fn qsizetype QString::lastIndexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4642 \overload lastIndexOf()
4643
4644 \include qstring.qdocinc {qstring-last-index-of} {character} {ch}
4645*/
4646
4647/*!
4648 \fn QString::lastIndexOf(QChar ch, Qt::CaseSensitivity) const
4649 \since 6.3
4650 \overload lastIndexOf()
4651*/
4652
4653/*!
4654 \fn qsizetype QString::lastIndexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4655 \since 5.14
4656 \overload lastIndexOf()
4657
4658 \include qstring.qdocinc {qstring-last-index-of} {string view} {str}
4659
4660 \include qstring.qdocinc negative-index-start-search-from-end
4661
4662 Returns -1 if \a str is not found.
4663
4664 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4665
4666 \note When searching for a 0-length \a str, the match at the end of
4667 the data is excluded from the search by a negative \a from, even
4668 though \c{-1} is normally thought of as searching from the end of the
4669 string: the match at the end is \e after the last character, so it is
4670 excluded. To include such a final empty match, either give a positive
4671 value for \a from or omit the \a from parameter entirely.
4672
4673 \sa indexOf(), contains(), count()
4674*/
4675
4676/*!
4677 \fn qsizetype QString::lastIndexOf(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4678 \since 6.2
4679 \overload lastIndexOf()
4680
4681 Returns the index position of the last occurrence of the string view \a
4682 str in this string. Returns -1 if \a str is not found.
4683
4684 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4685
4686 \sa indexOf(), contains(), count()
4687*/
4688
4689#if QT_CONFIG(regularexpression)
4690struct QStringCapture
4691{
4692 qsizetype pos;
4693 qsizetype len;
4694 int no;
4695};
4696Q_DECLARE_TYPEINFO(QStringCapture, Q_PRIMITIVE_TYPE);
4697
4698/*!
4699 \overload replace()
4700 \since 5.0
4701
4702 Replaces every occurrence of the regular expression \a re in the
4703 string with \a after. Returns a reference to the string. For
4704 example:
4705
4706 \snippet qstring/main.cpp 87
4707
4708 For regular expressions containing capturing groups,
4709 occurrences of \b{\\1}, \b{\\2}, ..., in \a after are replaced
4710 with the string captured by the corresponding capturing group.
4711
4712 \snippet qstring/main.cpp 88
4713
4714 \sa indexOf(), lastIndexOf(), remove(), QRegularExpression, QRegularExpressionMatch
4715*/
4716QString &QString::replace(const QRegularExpression &re, const QString &after)
4717{
4718 if (!re.isValid()) {
4719 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString::replace");
4720 return *this;
4721 }
4722
4723 const QString copy(*this);
4724 QRegularExpressionMatchIterator iterator = re.globalMatch(subject: copy);
4725 if (!iterator.hasNext()) // no matches at all
4726 return *this;
4727
4728 reallocData(alloc: d.size, option: QArrayData::KeepSize);
4729
4730 qsizetype numCaptures = re.captureCount();
4731
4732 // 1. build the backreferences list, holding where the backreferences
4733 // are in the replacement string
4734 QVarLengthArray<QStringCapture> backReferences;
4735 const qsizetype al = after.size();
4736 const QChar *ac = after.unicode();
4737
4738 for (qsizetype i = 0; i < al - 1; i++) {
4739 if (ac[i] == u'\\') {
4740 int no = ac[i + 1].digitValue();
4741 if (no > 0 && no <= numCaptures) {
4742 QStringCapture backReference;
4743 backReference.pos = i;
4744 backReference.len = 2;
4745
4746 if (i < al - 2) {
4747 int secondDigit = ac[i + 2].digitValue();
4748 if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) {
4749 no = (no * 10) + secondDigit;
4750 ++backReference.len;
4751 }
4752 }
4753
4754 backReference.no = no;
4755 backReferences.append(t: backReference);
4756 }
4757 }
4758 }
4759
4760 // 2. iterate on the matches. For every match, copy in chunks
4761 // - the part before the match
4762 // - the after string, with the proper replacements for the backreferences
4763
4764 qsizetype newLength = 0; // length of the new string, with all the replacements
4765 qsizetype lastEnd = 0;
4766 QVarLengthArray<QStringView> chunks;
4767 const QStringView copyView{ copy }, afterView{ after };
4768 while (iterator.hasNext()) {
4769 QRegularExpressionMatch match = iterator.next();
4770 qsizetype len;
4771 // add the part before the match
4772 len = match.capturedStart() - lastEnd;
4773 if (len > 0) {
4774 chunks << copyView.mid(pos: lastEnd, n: len);
4775 newLength += len;
4776 }
4777
4778 lastEnd = 0;
4779 // add the after string, with replacements for the backreferences
4780 for (const QStringCapture &backReference : std::as_const(t&: backReferences)) {
4781 // part of "after" before the backreference
4782 len = backReference.pos - lastEnd;
4783 if (len > 0) {
4784 chunks << afterView.mid(pos: lastEnd, n: len);
4785 newLength += len;
4786 }
4787
4788 // backreference itself
4789 len = match.capturedLength(nth: backReference.no);
4790 if (len > 0) {
4791 chunks << copyView.mid(pos: match.capturedStart(nth: backReference.no), n: len);
4792 newLength += len;
4793 }
4794
4795 lastEnd = backReference.pos + backReference.len;
4796 }
4797
4798 // add the last part of the after string
4799 len = afterView.size() - lastEnd;
4800 if (len > 0) {
4801 chunks << afterView.mid(pos: lastEnd, n: len);
4802 newLength += len;
4803 }
4804
4805 lastEnd = match.capturedEnd();
4806 }
4807
4808 // 3. trailing string after the last match
4809 if (copyView.size() > lastEnd) {
4810 chunks << copyView.mid(pos: lastEnd);
4811 newLength += copyView.size() - lastEnd;
4812 }
4813
4814 // 4. assemble the chunks together
4815 resize(size: newLength);
4816 qsizetype i = 0;
4817 QChar *uc = data();
4818 for (const QStringView &chunk : std::as_const(t&: chunks)) {
4819 qsizetype len = chunk.size();
4820 memcpy(dest: uc + i, src: chunk.constData(), n: len * sizeof(QChar));
4821 i += len;
4822 }
4823
4824 return *this;
4825}
4826#endif // QT_CONFIG(regularexpression)
4827
4828/*!
4829 Returns the number of (potentially overlapping) occurrences of
4830 the string \a str in this string.
4831
4832 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4833
4834 \sa contains(), indexOf()
4835*/
4836
4837qsizetype QString::count(const QString &str, Qt::CaseSensitivity cs) const
4838{
4839 return QtPrivate::count(haystack: QStringView(unicode(), size()), needle: QStringView(str.unicode(), str.size()), cs);
4840}
4841
4842/*!
4843 \overload count()
4844
4845 Returns the number of occurrences of character \a ch in the string.
4846
4847 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4848
4849 \sa contains(), indexOf()
4850*/
4851
4852qsizetype QString::count(QChar ch, Qt::CaseSensitivity cs) const
4853{
4854 return QtPrivate::count(haystack: QStringView(unicode(), size()), needle: ch, cs);
4855}
4856
4857/*!
4858 \since 6.0
4859 \overload count()
4860 Returns the number of (potentially overlapping) occurrences of the
4861 string view \a str in this string.
4862
4863 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4864
4865 \sa contains(), indexOf()
4866*/
4867qsizetype QString::count(QStringView str, Qt::CaseSensitivity cs) const
4868{
4869 return QtPrivate::count(haystack: *this, needle: str, cs);
4870}
4871
4872/*! \fn bool QString::contains(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4873
4874 Returns \c true if this string contains an occurrence of the string
4875 \a str; otherwise returns \c false.
4876
4877 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4878
4879 Example:
4880 \snippet qstring/main.cpp 17
4881
4882 \sa indexOf(), count()
4883*/
4884
4885/*! \fn bool QString::contains(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4886 \since 5.3
4887
4888 \overload contains()
4889
4890 Returns \c true if this string contains an occurrence of the latin-1 string
4891 \a str; otherwise returns \c false.
4892*/
4893
4894/*! \fn bool QString::contains(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4895
4896 \overload contains()
4897
4898 Returns \c true if this string contains an occurrence of the
4899 character \a ch; otherwise returns \c false.
4900*/
4901
4902/*! \fn bool QString::contains(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4903 \since 5.14
4904 \overload contains()
4905
4906 Returns \c true if this string contains an occurrence of the string view
4907 \a str; otherwise returns \c false.
4908
4909 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4910
4911 \sa indexOf(), count()
4912*/
4913
4914#if QT_CONFIG(regularexpression)
4915/*!
4916 \since 5.5
4917
4918 Returns the index position of the first match of the regular
4919 expression \a re in the string, searching forward from index
4920 position \a from. Returns -1 if \a re didn't match anywhere.
4921
4922 If the match is successful and \a rmatch is not \nullptr, it also
4923 writes the results of the match into the QRegularExpressionMatch object
4924 pointed to by \a rmatch.
4925
4926 Example:
4927
4928 \snippet qstring/main.cpp 93
4929*/
4930qsizetype QString::indexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4931{
4932 return QtPrivate::indexOf(viewHaystack: QStringView(*this), stringHaystack: this, re, from, rmatch);
4933}
4934
4935/*!
4936 \since 5.5
4937
4938 Returns the index position of the last match of the regular
4939 expression \a re in the string, which starts before the index
4940 position \a from.
4941
4942 \include qstring.qdocinc negative-index-start-search-from-end
4943
4944 Returns -1 if \a re didn't match anywhere.
4945
4946 If the match is successful and \a rmatch is not \nullptr, it also
4947 writes the results of the match into the QRegularExpressionMatch object
4948 pointed to by \a rmatch.
4949
4950 Example:
4951
4952 \snippet qstring/main.cpp 94
4953
4954 \note Due to how the regular expression matching algorithm works,
4955 this function will actually match repeatedly from the beginning of
4956 the string until the position \a from is reached.
4957
4958 \note When searching for a regular expression \a re that may match
4959 0 characters, the match at the end of the data is excluded from the
4960 search by a negative \a from, even though \c{-1} is normally
4961 thought of as searching from the end of the string: the match at
4962 the end is \e after the last character, so it is excluded. To
4963 include such a final empty match, either give a positive value for
4964 \a from or omit the \a from parameter entirely.
4965*/
4966qsizetype QString::lastIndexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4967{
4968 return QtPrivate::lastIndexOf(viewHaystack: QStringView(*this), stringHaystack: this, re, from, rmatch);
4969}
4970
4971/*!
4972 \fn qsizetype QString::lastIndexOf(const QRegularExpression &re, QRegularExpressionMatch *rmatch = nullptr) const
4973 \since 6.2
4974 \overload lastIndexOf()
4975
4976 Returns the index position of the last match of the regular
4977 expression \a re in the string. Returns -1 if \a re didn't match anywhere.
4978
4979 If the match is successful and \a rmatch is not \nullptr, it also
4980 writes the results of the match into the QRegularExpressionMatch object
4981 pointed to by \a rmatch.
4982
4983 Example:
4984
4985 \snippet qstring/main.cpp 94
4986
4987 \note Due to how the regular expression matching algorithm works,
4988 this function will actually match repeatedly from the beginning of
4989 the string until the end of the string is reached.
4990*/
4991
4992/*!
4993 \since 5.1
4994
4995 Returns \c true if the regular expression \a re matches somewhere in this
4996 string; otherwise returns \c false.
4997
4998 If the match is successful and \a rmatch is not \nullptr, it also
4999 writes the results of the match into the QRegularExpressionMatch object
5000 pointed to by \a rmatch.
5001
5002 \sa QRegularExpression::match()
5003*/
5004
5005bool QString::contains(const QRegularExpression &re, QRegularExpressionMatch *rmatch) const
5006{
5007 return QtPrivate::contains(viewHaystack: QStringView(*this), stringHaystack: this, re, rmatch);
5008}
5009
5010/*!
5011 \overload count()
5012 \since 5.0
5013
5014 Returns the number of times the regular expression \a re matches
5015 in the string.
5016
5017 For historical reasons, this function counts overlapping matches,
5018 so in the example below, there are four instances of "ana" or
5019 "ama":
5020
5021 \snippet qstring/main.cpp 95
5022
5023 This behavior is different from simply iterating over the matches
5024 in the string using QRegularExpressionMatchIterator.
5025
5026 \sa QRegularExpression::globalMatch()
5027*/
5028qsizetype QString::count(const QRegularExpression &re) const
5029{
5030 return QtPrivate::count(haystack: QStringView(*this), re);
5031}
5032#endif // QT_CONFIG(regularexpression)
5033
5034#if QT_DEPRECATED_SINCE(6, 4)
5035/*! \fn qsizetype QString::count() const
5036 \deprecated [6.4] Use size() or length() instead.
5037 \overload count()
5038
5039 Same as size().
5040*/
5041#endif
5042
5043/*!
5044 \enum QString::SectionFlag
5045
5046 This enum specifies flags that can be used to affect various
5047 aspects of the section() function's behavior with respect to
5048 separators and empty fields.
5049
5050 \value SectionDefault Empty fields are counted, leading and
5051 trailing separators are not included, and the separator is
5052 compared case sensitively.
5053
5054 \value SectionSkipEmpty Treat empty fields as if they don't exist,
5055 i.e. they are not considered as far as \e start and \e end are
5056 concerned.
5057
5058 \value SectionIncludeLeadingSep Include the leading separator (if
5059 any) in the result string.
5060
5061 \value SectionIncludeTrailingSep Include the trailing separator
5062 (if any) in the result string.
5063
5064 \value SectionCaseInsensitiveSeps Compare the separator
5065 case-insensitively.
5066
5067 \sa section()
5068*/
5069
5070/*!
5071 \fn QString QString::section(QChar sep, qsizetype start, qsizetype end = -1, SectionFlags flags) const
5072
5073 This function returns a section of the string.
5074
5075 This string is treated as a sequence of fields separated by the
5076 character, \a sep. The returned string consists of the fields from
5077 position \a start to position \a end inclusive. If \a end is not
5078 specified, all fields from position \a start to the end of the
5079 string are included. Fields are numbered 0, 1, 2, etc., counting
5080 from the left, and -1, -2, etc., counting from right to left.
5081
5082 The \a flags argument can be used to affect some aspects of the
5083 function's behavior, e.g. whether to be case sensitive, whether
5084 to skip empty fields and how to deal with leading and trailing
5085 separators; see \l{SectionFlags}.
5086
5087 \snippet qstring/main.cpp 52
5088
5089 If \a start or \a end is negative, we count fields from the right
5090 of the string, the right-most field being -1, the one from
5091 right-most field being -2, and so on.
5092
5093 \snippet qstring/main.cpp 53
5094
5095 \sa split()
5096*/
5097
5098/*!
5099 \overload section()
5100
5101 \snippet qstring/main.cpp 51
5102 \snippet qstring/main.cpp 54
5103
5104 \sa split()
5105*/
5106
5107QString QString::section(const QString &sep, qsizetype start, qsizetype end, SectionFlags flags) const
5108{
5109 const QList<QStringView> sections = QStringView{ *this }.split(
5110 sep, behavior: Qt::KeepEmptyParts, cs: (flags & SectionCaseInsensitiveSeps) ? Qt::CaseInsensitive : Qt::CaseSensitive);
5111 const qsizetype sectionsSize = sections.size();
5112 if (!(flags & SectionSkipEmpty)) {
5113 if (start < 0)
5114 start += sectionsSize;
5115 if (end < 0)
5116 end += sectionsSize;
5117 } else {
5118 qsizetype skip = 0;
5119 for (qsizetype k = 0; k < sectionsSize; ++k) {
5120 if (sections.at(i: k).isEmpty())
5121 skip++;
5122 }
5123 if (start < 0)
5124 start += sectionsSize - skip;
5125 if (end < 0)
5126 end += sectionsSize - skip;
5127 }
5128 if (start >= sectionsSize || end < 0 || start > end)
5129 return QString();
5130
5131 QString ret;
5132 qsizetype first_i = start, last_i = end;
5133 for (qsizetype x = 0, i = 0; x <= end && i < sectionsSize; ++i) {
5134 const QStringView &section = sections.at(i);
5135 const bool empty = section.isEmpty();
5136 if (x >= start) {
5137 if (x == start)
5138 first_i = i;
5139 if (x == end)
5140 last_i = i;
5141 if (x > start && i > 0)
5142 ret += sep;
5143 ret += section;
5144 }
5145 if (!empty || !(flags & SectionSkipEmpty))
5146 x++;
5147 }
5148 if ((flags & SectionIncludeLeadingSep) && first_i > 0)
5149 ret.prepend(s: sep);
5150 if ((flags & SectionIncludeTrailingSep) && last_i < sectionsSize - 1)
5151 ret += sep;
5152 return ret;
5153}
5154
5155#if QT_CONFIG(regularexpression)
5156class qt_section_chunk {
5157public:
5158 qt_section_chunk() {}
5159 qt_section_chunk(qsizetype l, QStringView s) : length(l), string(std::move(s)) {}
5160 qsizetype length;
5161 QStringView string;
5162};
5163Q_DECLARE_TYPEINFO(qt_section_chunk, Q_RELOCATABLE_TYPE);
5164
5165static QString extractSections(QSpan<qt_section_chunk> sections, qsizetype start, qsizetype end,
5166 QString::SectionFlags flags)
5167{
5168 const qsizetype sectionsSize = sections.size();
5169
5170 if (!(flags & QString::SectionSkipEmpty)) {
5171 if (start < 0)
5172 start += sectionsSize;
5173 if (end < 0)
5174 end += sectionsSize;
5175 } else {
5176 qsizetype skip = 0;
5177 for (qsizetype k = 0; k < sectionsSize; ++k) {
5178 const qt_section_chunk &section = sections[k];
5179 if (section.length == section.string.size())
5180 skip++;
5181 }
5182 if (start < 0)
5183 start += sectionsSize - skip;
5184 if (end < 0)
5185 end += sectionsSize - skip;
5186 }
5187 if (start >= sectionsSize || end < 0 || start > end)
5188 return QString();
5189
5190 QString ret;
5191 qsizetype x = 0;
5192 qsizetype first_i = start, last_i = end;
5193 for (qsizetype i = 0; x <= end && i < sectionsSize; ++i) {
5194 const qt_section_chunk &section = sections[i];
5195 const bool empty = (section.length == section.string.size());
5196 if (x >= start) {
5197 if (x == start)
5198 first_i = i;
5199 if (x == end)
5200 last_i = i;
5201 if (x != start)
5202 ret += section.string;
5203 else
5204 ret += section.string.mid(pos: section.length);
5205 }
5206 if (!empty || !(flags & QString::SectionSkipEmpty))
5207 x++;
5208 }
5209
5210 if ((flags & QString::SectionIncludeLeadingSep) && first_i >= 0) {
5211 const qt_section_chunk &section = sections[first_i];
5212 ret.prepend(v: section.string.left(n: section.length));
5213 }
5214
5215 if ((flags & QString::SectionIncludeTrailingSep)
5216 && last_i < sectionsSize - 1) {
5217 const qt_section_chunk &section = sections[last_i + 1];
5218 ret += section.string.left(n: section.length);
5219 }
5220
5221 return ret;
5222}
5223
5224/*!
5225 \overload section()
5226 \since 5.0
5227
5228 This string is treated as a sequence of fields separated by the
5229 regular expression, \a re.
5230
5231 \snippet qstring/main.cpp 89
5232
5233 \warning Using this QRegularExpression version is much more expensive than
5234 the overloaded string and character versions.
5235
5236 \sa split(), simplified()
5237*/
5238QString QString::section(const QRegularExpression &re, qsizetype start, qsizetype end, SectionFlags flags) const
5239{
5240 if (!re.isValid()) {
5241 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString::section");
5242 return QString();
5243 }
5244
5245 const QChar *uc = unicode();
5246 if (!uc)
5247 return QString();
5248
5249 QRegularExpression sep(re);
5250 if (flags & SectionCaseInsensitiveSeps)
5251 sep.setPatternOptions(sep.patternOptions() | QRegularExpression::CaseInsensitiveOption);
5252
5253 QVarLengthArray<qt_section_chunk> sections;
5254 qsizetype n = size(), m = 0, last_m = 0, last_len = 0;
5255 QRegularExpressionMatchIterator iterator = sep.globalMatch(subject: *this);
5256 while (iterator.hasNext()) {
5257 QRegularExpressionMatch match = iterator.next();
5258 m = match.capturedStart();
5259 sections.append(t: qt_section_chunk(last_len, QStringView{ *this }.sliced(pos: last_m, n: m - last_m)));
5260 last_m = m;
5261 last_len = match.capturedLength();
5262 }
5263 sections.append(t: qt_section_chunk(last_len, QStringView{ *this }.sliced(pos: last_m, n: n - last_m)));
5264
5265 return extractSections(sections, start, end, flags);
5266}
5267#endif // QT_CONFIG(regularexpression)
5268
5269/*!
5270 \fn QString QString::left(qsizetype n) const &
5271 \fn QString QString::left(qsizetype n) &&
5272
5273 Returns a substring that contains the \a n leftmost characters
5274 of the string.
5275
5276 If you know that \a n cannot be out of bounds, use first() instead in new
5277 code, because it is faster.
5278
5279 The entire string is returned if \a n is greater than or equal
5280 to size(), or less than zero.
5281
5282 \sa first(), last(), startsWith(), chopped(), chop(), truncate()
5283*/
5284
5285/*!
5286 \fn QString QString::right(qsizetype n) const &
5287 \fn QString QString::right(qsizetype n) &&
5288
5289 Returns a substring that contains the \a n rightmost characters
5290 of the string.
5291
5292 If you know that \a n cannot be out of bounds, use last() instead in new
5293 code, because it is faster.
5294
5295 The entire string is returned if \a n is greater than or equal
5296 to size(), or less than zero.
5297
5298 \sa endsWith(), last(), first(), sliced(), chopped(), chop(), truncate(), slice()
5299*/
5300
5301/*!
5302 \fn QString QString::mid(qsizetype position, qsizetype n) const &
5303 \fn QString QString::mid(qsizetype position, qsizetype n) &&
5304
5305 Returns a string that contains \a n characters of this string,
5306 starting at the specified \a position index.
5307
5308 If you know that \a position and \a n cannot be out of bounds, use sliced()
5309 instead in new code, because it is faster.
5310
5311 Returns a null string if the \a position index exceeds the
5312 length of the string. If there are less than \a n characters
5313 available in the string starting at the given \a position, or if
5314 \a n is -1 (default), the function returns all characters that
5315 are available from the specified \a position.
5316
5317 \sa first(), last(), sliced(), chopped(), chop(), truncate(), slice()
5318*/
5319QString QString::mid(qsizetype position, qsizetype n) const &
5320{
5321 qsizetype p = position;
5322 qsizetype l = n;
5323 using namespace QtPrivate;
5324 switch (QContainerImplHelper::mid(originalLength: size(), position: &p, length: &l)) {
5325 case QContainerImplHelper::Null:
5326 return QString();
5327 case QContainerImplHelper::Empty:
5328 return QString(DataPointer::fromRawData(rawData: &_empty, length: 0));
5329 case QContainerImplHelper::Full:
5330 return *this;
5331 case QContainerImplHelper::Subset:
5332 return sliced(pos: p, n: l);
5333 }
5334 Q_UNREACHABLE_RETURN(QString());
5335}
5336
5337QString QString::mid(qsizetype position, qsizetype n) &&
5338{
5339 qsizetype p = position;
5340 qsizetype l = n;
5341 using namespace QtPrivate;
5342 switch (QContainerImplHelper::mid(originalLength: size(), position: &p, length: &l)) {
5343 case QContainerImplHelper::Null:
5344 return QString();
5345 case QContainerImplHelper::Empty:
5346 resize(size: 0); // keep capacity if we've reserve()d
5347 [[fallthrough]];
5348 case QContainerImplHelper::Full:
5349 return std::move(*this);
5350 case QContainerImplHelper::Subset:
5351 return std::move(*this).sliced(pos: p, n: l);
5352 }
5353 Q_UNREACHABLE_RETURN(QString());
5354}
5355
5356/*!
5357 \fn QString QString::first(qsizetype n) const &
5358 \fn QString QString::first(qsizetype n) &&
5359 \since 6.0
5360
5361 Returns a string that contains the first \a n characters
5362 of this string.
5363
5364 \note The behavior is undefined when \a n < 0 or \a n > size().
5365
5366 \snippet qstring/main.cpp 31
5367
5368 \sa last(), sliced(), startsWith(), chopped(), chop(), truncate(), slice()
5369*/
5370
5371/*!
5372 \fn QString QString::last(qsizetype n) const &
5373 \fn QString QString::last(qsizetype n) &&
5374 \since 6.0
5375
5376 Returns the string that contains the last \a n characters of this string.
5377
5378 \note The behavior is undefined when \a n < 0 or \a n > size().
5379
5380 \snippet qstring/main.cpp 48
5381
5382 \sa first(), sliced(), endsWith(), chopped(), chop(), truncate(), slice()
5383*/
5384
5385/*!
5386 \fn QString QString::sliced(qsizetype pos, qsizetype n) const &
5387 \fn QString QString::sliced(qsizetype pos, qsizetype n) &&
5388 \since 6.0
5389
5390 Returns a string that contains \a n characters of this string,
5391 starting at position \a pos.
5392
5393 \note The behavior is undefined when \a pos < 0, \a n < 0,
5394 or \a pos + \a n > size().
5395
5396 \snippet qstring/main.cpp 34
5397
5398 \sa first(), last(), chopped(), chop(), truncate(), slice()
5399*/
5400QString QString::sliced_helper(QString &str, qsizetype pos, qsizetype n)
5401{
5402 if (n == 0)
5403 return QString(DataPointer::fromRawData(rawData: &_empty, length: 0));
5404 DataPointer d = std::move(str.d).sliced(pos, n);
5405 d.data()[n] = 0;
5406 return QString(std::move(d));
5407}
5408
5409/*!
5410 \fn QString QString::sliced(qsizetype pos) const &
5411 \fn QString QString::sliced(qsizetype pos) &&
5412 \since 6.0
5413 \overload
5414
5415 Returns a string that contains the portion of this string starting at
5416 position \a pos and extending to its end.
5417
5418 \note The behavior is undefined when \a pos < 0 or \a pos > size().
5419
5420 \sa first(), last(), chopped(), chop(), truncate(), slice()
5421*/
5422
5423/*!
5424 \fn QString &QString::slice(qsizetype pos, qsizetype n)
5425 \since 6.8
5426
5427 Modifies this string to start at position \a pos, extending for \a n
5428 characters (code points), and returns a reference to this string.
5429
5430 \note The behavior is undefined if \a pos < 0, \a n < 0,
5431 or \a pos + \a n > size().
5432
5433 \snippet qstring/main.cpp slice97
5434
5435 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5436*/
5437
5438/*!
5439 \fn QString &QString::slice(qsizetype pos)
5440 \since 6.8
5441 \overload
5442
5443 Modifies this string to start at position \a pos and extending to its end,
5444 and returns a reference to this string.
5445
5446 \note The behavior is undefined if \a pos < 0 or \a pos > size().
5447
5448 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5449*/
5450
5451/*!
5452 \fn QString QString::chopped(qsizetype len) const &
5453 \fn QString QString::chopped(qsizetype len) &&
5454 \since 5.10
5455
5456 Returns a string that contains the size() - \a len leftmost characters
5457 of this string.
5458
5459 \note The behavior is undefined if \a len is negative or greater than size().
5460
5461 \sa endsWith(), first(), last(), sliced(), chop(), truncate(), slice()
5462*/
5463
5464/*!
5465 Returns \c true if the string starts with \a s; otherwise returns
5466 \c false.
5467
5468 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5469
5470 \snippet qstring/main.cpp 65
5471
5472 \sa endsWith()
5473*/
5474bool QString::startsWith(const QString& s, Qt::CaseSensitivity cs) const
5475{
5476 return qt_starts_with_impl(haystack: QStringView(*this), needle: QStringView(s), cs);
5477}
5478
5479/*!
5480 \overload startsWith()
5481 */
5482bool QString::startsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5483{
5484 return qt_starts_with_impl(haystack: QStringView(*this), needle: s, cs);
5485}
5486
5487/*!
5488 \overload startsWith()
5489
5490 Returns \c true if the string starts with \a c; otherwise returns
5491 \c false.
5492*/
5493bool QString::startsWith(QChar c, Qt::CaseSensitivity cs) const
5494{
5495 if (!size())
5496 return false;
5497 if (cs == Qt::CaseSensitive)
5498 return at(i: 0) == c;
5499 return foldCase(ch: at(i: 0)) == foldCase(ch: c);
5500}
5501
5502/*!
5503 \fn bool QString::startsWith(QStringView str, Qt::CaseSensitivity cs) const
5504 \since 5.10
5505 \overload
5506
5507 Returns \c true if the string starts with the string view \a str;
5508 otherwise returns \c false.
5509
5510 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5511
5512 \sa endsWith()
5513*/
5514
5515/*!
5516 Returns \c true if the string ends with \a s; otherwise returns
5517 \c false.
5518
5519 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5520
5521 \snippet qstring/main.cpp 20
5522
5523 \sa startsWith()
5524*/
5525bool QString::endsWith(const QString &s, Qt::CaseSensitivity cs) const
5526{
5527 return qt_ends_with_impl(haystack: QStringView(*this), needle: QStringView(s), cs);
5528}
5529
5530/*!
5531 \fn bool QString::endsWith(QStringView str, Qt::CaseSensitivity cs) const
5532 \since 5.10
5533 \overload endsWith()
5534 Returns \c true if the string ends with the string view \a str;
5535 otherwise returns \c false.
5536
5537 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5538
5539 \sa startsWith()
5540*/
5541
5542/*!
5543 \overload endsWith()
5544*/
5545bool QString::endsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5546{
5547 return qt_ends_with_impl(haystack: QStringView(*this), needle: s, cs);
5548}
5549
5550/*!
5551 Returns \c true if the string ends with \a c; otherwise returns
5552 \c false.
5553
5554 \overload endsWith()
5555 */
5556bool QString::endsWith(QChar c, Qt::CaseSensitivity cs) const
5557{
5558 if (!size())
5559 return false;
5560 if (cs == Qt::CaseSensitive)
5561 return at(i: size() - 1) == c;
5562 return foldCase(ch: at(i: size() - 1)) == foldCase(ch: c);
5563}
5564
5565static bool checkCase(QStringView s, QUnicodeTables::Case c) noexcept
5566{
5567 QStringIterator it(s);
5568 while (it.hasNext()) {
5569 const char32_t uc = it.next();
5570 if (qGetProp(ucs4: uc)->cases[c].diff)
5571 return false;
5572 }
5573 return true;
5574}
5575
5576bool QtPrivate::isLower(QStringView s) noexcept
5577{
5578 return checkCase(s, c: QUnicodeTables::LowerCase);
5579}
5580
5581bool QtPrivate::isUpper(QStringView s) noexcept
5582{
5583 return checkCase(s, c: QUnicodeTables::UpperCase);
5584}
5585
5586/*!
5587 Returns \c true if the string is uppercase, that is, it's identical
5588 to its toUpper() folding.
5589
5590 Note that this does \e not mean that the string does not contain
5591 lowercase letters (some lowercase letters do not have a uppercase
5592 folding; they are left unchanged by toUpper()).
5593 For more information, refer to the Unicode standard, section 3.13.
5594
5595 \since 5.12
5596
5597 \sa QChar::toUpper(), isLower()
5598*/
5599bool QString::isUpper() const
5600{
5601 return QtPrivate::isUpper(s: qToStringViewIgnoringNull(s: *this));
5602}
5603
5604/*!
5605 Returns \c true if the string is lowercase, that is, it's identical
5606 to its toLower() folding.
5607
5608 Note that this does \e not mean that the string does not contain
5609 uppercase letters (some uppercase letters do not have a lowercase
5610 folding; they are left unchanged by toLower()).
5611 For more information, refer to the Unicode standard, section 3.13.
5612
5613 \since 5.12
5614
5615 \sa QChar::toLower(), isUpper()
5616 */
5617bool QString::isLower() const
5618{
5619 return QtPrivate::isLower(s: qToStringViewIgnoringNull(s: *this));
5620}
5621
5622static QByteArray qt_convert_to_latin1(QStringView string);
5623
5624QByteArray QString::toLatin1_helper(const QString &string)
5625{
5626 return qt_convert_to_latin1(string);
5627}
5628
5629/*!
5630 \since 6.0
5631 \internal
5632 \relates QAnyStringView
5633
5634 Returns a UTF-16 representation of \a string as a QString.
5635
5636 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5637 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5638*/
5639QString QtPrivate::convertToQString(QAnyStringView string)
5640{
5641 return string.visit(v: [] (auto string) { return string.toString(); });
5642}
5643
5644/*!
5645 \since 5.10
5646 \internal
5647 \relates QStringView
5648
5649 Returns a Latin-1 representation of \a string as a QByteArray.
5650
5651 The behavior is undefined if \a string contains non-Latin1 characters.
5652
5653 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5654 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5655*/
5656QByteArray QtPrivate::convertToLatin1(QStringView string)
5657{
5658 return qt_convert_to_latin1(string);
5659}
5660
5661Q_NEVER_INLINE
5662static QByteArray qt_convert_to_latin1(QStringView string)
5663{
5664 if (Q_UNLIKELY(string.isNull()))
5665 return QByteArray();
5666
5667 QByteArray ba(string.size(), Qt::Uninitialized);
5668
5669 // since we own the only copy, we're going to const_cast the constData;
5670 // that avoids an unnecessary call to detach() and expansion code that will never get used
5671 qt_to_latin1(dst: reinterpret_cast<uchar *>(const_cast<char *>(ba.constData())),
5672 src: string.utf16(), length: string.size());
5673 return ba;
5674}
5675
5676QByteArray QString::toLatin1_helper_inplace(QString &s)
5677{
5678 if (!s.isDetached())
5679 return qt_convert_to_latin1(string: s);
5680
5681 // We can return our own buffer to the caller.
5682 // Conversion to Latin-1 always shrinks the buffer by half.
5683 // This relies on the fact that we use QArrayData for everything behind the scenes
5684
5685 // First, do the in-place conversion. Since isDetached() == true, the data
5686 // was allocated by QArrayData, so the null terminator must be there.
5687 qsizetype length = s.size();
5688 char16_t *sdata = s.d->data();
5689 Q_ASSERT(sdata[length] == u'\0');
5690 qt_to_latin1(dst: reinterpret_cast<uchar *>(sdata), src: sdata, length: length + 1);
5691
5692 // Move the internals over to the byte array.
5693 // Kids, avert your eyes. Don't try this at home.
5694 auto ba_d = std::move(s.d).reinterpreted<char>();
5695
5696 // Some sanity checks
5697 Q_ASSERT(ba_d.d->allocatedCapacity() >= ba_d.size);
5698 Q_ASSERT(s.isNull());
5699 Q_ASSERT(s.isEmpty());
5700 Q_ASSERT(s.constData() == QString().constData());
5701
5702 return QByteArray(std::move(ba_d));
5703}
5704
5705// QLatin1 methods that use helpers from qstring.cpp
5706char16_t *QLatin1::convertToUnicode(char16_t *out, QLatin1StringView in) noexcept
5707{
5708 const qsizetype len = in.size();
5709 qt_from_latin1(dst: out, str: in.data(), size: len);
5710 return std::next(x: out, n: len);
5711}
5712
5713char *QLatin1::convertFromUnicode(char *out, QStringView in) noexcept
5714{
5715 const qsizetype len = in.size();
5716 qt_to_latin1(dst: reinterpret_cast<uchar *>(out), src: in.utf16(), length: len);
5717 return out + len;
5718}
5719
5720/*!
5721 \fn QByteArray QString::toLatin1() const
5722
5723 Returns a Latin-1 representation of the string as a QByteArray.
5724
5725 The returned byte array is undefined if the string contains non-Latin1
5726 characters. Those characters may be suppressed or replaced with a
5727 question mark.
5728
5729 \sa fromLatin1(), toUtf8(), toLocal8Bit(), QStringEncoder
5730*/
5731
5732static QByteArray qt_convert_to_local_8bit(QStringView string);
5733
5734/*!
5735 \fn QByteArray QString::toLocal8Bit() const
5736
5737 Returns the local 8-bit representation of the string as a
5738 QByteArray.
5739
5740 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {toUtf8}
5741
5742 If this string contains any characters that cannot be encoded in the
5743 local 8-bit encoding, the returned byte array is undefined. Those
5744 characters may be suppressed or replaced by another.
5745
5746 \sa fromLocal8Bit(), toLatin1(), toUtf8(), QStringEncoder
5747*/
5748
5749QByteArray QString::toLocal8Bit_helper(const QChar *data, qsizetype size)
5750{
5751 return qt_convert_to_local_8bit(string: QStringView(data, size));
5752}
5753
5754static QByteArray qt_convert_to_local_8bit(QStringView string)
5755{
5756 if (string.isNull())
5757 return QByteArray();
5758 QStringEncoder fromUtf16(QStringEncoder::System, QStringEncoder::Flag::Stateless);
5759 return fromUtf16(string);
5760}
5761
5762/*!
5763 \since 5.10
5764 \internal
5765 \relates QStringView
5766
5767 Returns a local 8-bit representation of \a string as a QByteArray.
5768
5769 On Unix systems this is equivalent to toUtf8(), on Windows the systems
5770 current code page is being used.
5771
5772 The behavior is undefined if \a string contains characters not
5773 supported by the locale's 8-bit encoding.
5774
5775 \sa QString::toLocal8Bit(), QStringView::toLocal8Bit()
5776*/
5777QByteArray QtPrivate::convertToLocal8Bit(QStringView string)
5778{
5779 return qt_convert_to_local_8bit(string);
5780}
5781
5782static QByteArray qt_convert_to_utf8(QStringView str);
5783
5784/*!
5785 \fn QByteArray QString::toUtf8() const
5786
5787 Returns a UTF-8 representation of the string as a QByteArray.
5788
5789 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5790 string like QString.
5791
5792 \sa fromUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder
5793*/
5794
5795QByteArray QString::toUtf8_helper(const QString &str)
5796{
5797 return qt_convert_to_utf8(str);
5798}
5799
5800static QByteArray qt_convert_to_utf8(QStringView str)
5801{
5802 if (str.isNull())
5803 return QByteArray();
5804
5805 return QUtf8::convertFromUnicode(in: str);
5806}
5807
5808/*!
5809 \since 5.10
5810 \internal
5811 \relates QStringView
5812
5813 Returns a UTF-8 representation of \a string as a QByteArray.
5814
5815 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5816 string like QStringView.
5817
5818 \sa QString::toUtf8(), QStringView::toUtf8()
5819*/
5820QByteArray QtPrivate::convertToUtf8(QStringView string)
5821{
5822 return qt_convert_to_utf8(str: string);
5823}
5824
5825static QList<uint> qt_convert_to_ucs4(QStringView string);
5826
5827/*!
5828 \since 4.2
5829
5830 Returns a UCS-4/UTF-32 representation of the string as a QList<uint>.
5831
5832 UTF-32 is a Unicode codec and therefore it is lossless. All characters from
5833 this string will be encoded in UTF-32. Any invalid sequence of code units in
5834 this string is replaced by the Unicode replacement character
5835 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5836
5837 The returned list is not 0-terminated.
5838
5839 \sa fromUtf8(), toUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder,
5840 fromUcs4(), toWCharArray()
5841*/
5842QList<uint> QString::toUcs4() const
5843{
5844 return qt_convert_to_ucs4(string: *this);
5845}
5846
5847static QList<uint> qt_convert_to_ucs4(QStringView string)
5848{
5849 QList<uint> v(string.size());
5850 uint *a = const_cast<uint*>(v.constData());
5851 QStringIterator it(string);
5852 while (it.hasNext())
5853 *a++ = it.next();
5854 v.resize(size: a - v.constData());
5855 return v;
5856}
5857
5858/*!
5859 \since 5.10
5860 \internal
5861 \relates QStringView
5862
5863 Returns a UCS-4/UTF-32 representation of \a string as a QList<uint>.
5864
5865 UTF-32 is a Unicode codec and therefore it is lossless. All characters from
5866 this string will be encoded in UTF-32. Any invalid sequence of code units in
5867 this string is replaced by the Unicode replacement character
5868 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5869
5870 The returned list is not 0-terminated.
5871
5872 \sa QString::toUcs4(), QStringView::toUcs4(), QtPrivate::convertToLatin1(),
5873 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUtf8()
5874*/
5875QList<uint> QtPrivate::convertToUcs4(QStringView string)
5876{
5877 return qt_convert_to_ucs4(string);
5878}
5879
5880/*!
5881 \fn QString QString::fromLatin1(QByteArrayView str)
5882 \overload
5883 \since 6.0
5884
5885 Returns a QString initialized with the Latin-1 string \a str.
5886
5887 \note: any null ('\\0') bytes in the byte array will be included in this
5888 string, converted to Unicode null characters (U+0000).
5889*/
5890QString QString::fromLatin1(QByteArrayView ba)
5891{
5892 DataPointer d;
5893 if (!ba.data()) {
5894 // nothing to do
5895 } else if (ba.size() == 0) {
5896 d = DataPointer::fromRawData(rawData: &_empty, length: 0);
5897 } else {
5898 d = DataPointer(ba.size(), ba.size());
5899 Q_CHECK_PTR(d.data());
5900 d.data()[ba.size()] = '\0';
5901 char16_t *dst = d.data();
5902
5903 qt_from_latin1(dst, str: ba.data(), size: size_t(ba.size()));
5904 }
5905 return QString(std::move(d));
5906}
5907
5908/*!
5909 \fn QString QString::fromLatin1(const char *str, qsizetype size)
5910 Returns a QString initialized with the first \a size characters
5911 of the Latin-1 string \a str.
5912
5913 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5914
5915 \sa toLatin1(), fromUtf8(), fromLocal8Bit()
5916*/
5917
5918/*!
5919 \fn QString QString::fromLatin1(const QByteArray &str)
5920 \overload
5921 \since 5.0
5922
5923 Returns a QString initialized with the Latin-1 string \a str.
5924
5925 \note: any null ('\\0') bytes in the byte array will be included in this
5926 string, converted to Unicode null characters (U+0000). This behavior is
5927 different from Qt 5.x.
5928*/
5929
5930/*!
5931 \fn QString QString::fromLocal8Bit(const char *str, qsizetype size)
5932 Returns a QString initialized with the first \a size characters
5933 of the 8-bit string \a str.
5934
5935 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5936
5937 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5938
5939 \sa toLocal8Bit(), fromLatin1(), fromUtf8()
5940*/
5941
5942/*!
5943 \fn QString QString::fromLocal8Bit(const QByteArray &str)
5944 \overload
5945 \since 5.0
5946
5947 Returns a QString initialized with the 8-bit string \a str.
5948
5949 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5950
5951 \note: any null ('\\0') bytes in the byte array will be included in this
5952 string, converted to Unicode null characters (U+0000). This behavior is
5953 different from Qt 5.x.
5954*/
5955
5956/*!
5957 \fn QString QString::fromLocal8Bit(QByteArrayView str)
5958 \overload
5959 \since 6.0
5960
5961 Returns a QString initialized with the 8-bit string \a str.
5962
5963 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5964
5965 \note: any null ('\\0') bytes in the byte array will be included in this
5966 string, converted to Unicode null characters (U+0000).
5967*/
5968QString QString::fromLocal8Bit(QByteArrayView ba)
5969{
5970 if (ba.isNull())
5971 return QString();
5972 if (ba.isEmpty())
5973 return QString(DataPointer::fromRawData(rawData: &_empty, length: 0));
5974 QStringDecoder toUtf16(QStringDecoder::System, QStringDecoder::Flag::Stateless);
5975 return toUtf16(ba);
5976}
5977
5978/*! \fn QString QString::fromUtf8(const char *str, qsizetype size)
5979 Returns a QString initialized with the first \a size bytes
5980 of the UTF-8 string \a str.
5981
5982 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5983
5984 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5985 string like QString. However, invalid sequences are possible with UTF-8
5986 and, if any such are found, they will be replaced with one or more
5987 "replacement characters", or suppressed. These include non-Unicode
5988 sequences, non-characters, overlong sequences or surrogate codepoints
5989 encoded into UTF-8.
5990
5991 This function can be used to process incoming data incrementally as long as
5992 all UTF-8 characters are terminated within the incoming data. Any
5993 unterminated characters at the end of the string will be replaced or
5994 suppressed. In order to do stateful decoding, please use \l QStringDecoder.
5995
5996 \sa toUtf8(), fromLatin1(), fromLocal8Bit()
5997*/
5998
5999/*!
6000 \fn QString QString::fromUtf8(const char8_t *str)
6001 \overload
6002 \since 6.1
6003
6004 This overload is only available when compiling in C++20 mode.
6005*/
6006
6007/*!
6008 \fn QString QString::fromUtf8(const char8_t *str, qsizetype size)
6009 \overload
6010 \since 6.0
6011
6012 This overload is only available when compiling in C++20 mode.
6013*/
6014
6015/*!
6016 \fn QString QString::fromUtf8(const QByteArray &str)
6017 \overload
6018 \since 5.0
6019
6020 Returns a QString initialized with the UTF-8 string \a str.
6021
6022 \note: any null ('\\0') bytes in the byte array will be included in this
6023 string, converted to Unicode null characters (U+0000). This behavior is
6024 different from Qt 5.x.
6025*/
6026
6027/*!
6028 \fn QString QString::fromUtf8(QByteArrayView str)
6029 \overload
6030 \since 6.0
6031
6032 Returns a QString initialized with the UTF-8 string \a str.
6033
6034 \note: any null ('\\0') bytes in the byte array will be included in this
6035 string, converted to Unicode null characters (U+0000).
6036*/
6037QString QString::fromUtf8(QByteArrayView ba)
6038{
6039 if (ba.isNull())
6040 return QString();
6041 if (ba.isEmpty())
6042 return QString(DataPointer::fromRawData(rawData: &_empty, length: 0));
6043 return QUtf8::convertToUnicode(in: ba);
6044}
6045
6046#ifndef QT_BOOTSTRAPPED
6047/*!
6048 \since 5.3
6049 Returns a QString initialized with the first \a size characters
6050 of the Unicode string \a unicode (ISO-10646-UTF-16 encoded).
6051
6052 If \a size is -1 (default), \a unicode must be '\\0'-terminated.
6053
6054 This function checks for a Byte Order Mark (BOM). If it is missing,
6055 host byte order is assumed.
6056
6057 This function is slow compared to the other Unicode conversions.
6058 Use QString(const QChar *, qsizetype) or QString(const QChar *) if possible.
6059
6060 QString makes a deep copy of the Unicode data.
6061
6062 \sa utf16(), setUtf16(), fromStdU16String()
6063*/
6064QString QString::fromUtf16(const char16_t *unicode, qsizetype size)
6065{
6066 if (!unicode)
6067 return QString();
6068 if (size < 0)
6069 size = QtPrivate::qustrlen(str: unicode);
6070 QStringDecoder toUtf16(QStringDecoder::Utf16, QStringDecoder::Flag::Stateless);
6071 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 2));
6072}
6073
6074/*!
6075 \fn QString QString::fromUtf16(const ushort *str, qsizetype size)
6076 \deprecated [6.0] Use the \c char16_t overload instead.
6077*/
6078
6079/*!
6080 \fn QString QString::fromUcs4(const uint *str, qsizetype size)
6081 \since 4.2
6082 \deprecated [6.0] Use the \c char32_t overload instead.
6083*/
6084
6085/*!
6086 \since 5.3
6087
6088 Returns a QString initialized with the first \a size characters
6089 of the Unicode string \a unicode (encoded as UTF-32).
6090
6091 If \a size is -1 (default), \a unicode must be '\\0'-terminated.
6092
6093 \sa toUcs4(), fromUtf16(), utf16(), setUtf16(), fromWCharArray(),
6094 fromStdU32String()
6095*/
6096QString QString::fromUcs4(const char32_t *unicode, qsizetype size)
6097{
6098 if (!unicode)
6099 return QString();
6100 if (size < 0) {
6101 if constexpr (sizeof(char32_t) == sizeof(wchar_t))
6102 size = wcslen(s: reinterpret_cast<const wchar_t *>(unicode));
6103 else
6104 size = std::char_traits<char32_t>::length(s: unicode);
6105 }
6106 QStringDecoder toUtf16(QStringDecoder::Utf32, QStringDecoder::Flag::Stateless);
6107 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 4));
6108}
6109#endif // !QT_BOOTSTRAPPED
6110
6111/*!
6112 Resizes the string to \a size characters and copies \a unicode
6113 into the string.
6114
6115 If \a unicode is \nullptr, nothing is copied, but the string is still
6116 resized to \a size.
6117
6118 \sa unicode(), setUtf16()
6119*/
6120QString& QString::setUnicode(const QChar *unicode, qsizetype size)
6121{
6122 resize(size);
6123 if (unicode && size)
6124 memcpy(dest: d.data(), src: unicode, n: size * sizeof(QChar));
6125 return *this;
6126}
6127
6128/*!
6129 \fn QString &QString::setUtf16(const ushort *unicode, qsizetype size)
6130
6131 Resizes the string to \a size characters and copies \a unicode
6132 into the string.
6133
6134 If \a unicode is \nullptr, nothing is copied, but the string is still
6135 resized to \a size.
6136
6137 Note that unlike fromUtf16(), this function does not consider BOMs and
6138 possibly differing byte ordering.
6139
6140 \sa utf16(), setUnicode()
6141*/
6142
6143/*!
6144 \fn QString QString::simplified() const
6145
6146 Returns a string that has whitespace removed from the start
6147 and the end, and that has each sequence of internal whitespace
6148 replaced with a single space.
6149
6150 Whitespace means any character for which QChar::isSpace() returns
6151 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6152 '\\f', '\\r', and ' '.
6153
6154 Example:
6155
6156 \snippet qstring/main.cpp 57
6157
6158 \sa trimmed()
6159*/
6160QString QString::simplified_helper(const QString &str)
6161{
6162 return QStringAlgorithms<const QString>::simplified_helper(str);
6163}
6164
6165QString QString::simplified_helper(QString &str)
6166{
6167 return QStringAlgorithms<QString>::simplified_helper(str);
6168}
6169
6170namespace {
6171 template <typename StringView>
6172 StringView qt_trimmed(StringView s) noexcept
6173 {
6174 const auto [begin, end] = QStringAlgorithms<const StringView>::trimmed_helper_positions(s);
6175 return StringView{begin, end};
6176 }
6177}
6178
6179/*!
6180 \fn QStringView QtPrivate::trimmed(QStringView s)
6181 \fn QLatin1StringView QtPrivate::trimmed(QLatin1StringView s)
6182 \internal
6183 \relates QStringView
6184 \since 5.10
6185
6186 Returns \a s with whitespace removed from the start and the end.
6187
6188 Whitespace means any character for which QChar::isSpace() returns
6189 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6190 '\\f', '\\r', and ' '.
6191
6192 \sa QString::trimmed(), QStringView::trimmed(), QLatin1StringView::trimmed()
6193*/
6194QStringView QtPrivate::trimmed(QStringView s) noexcept
6195{
6196 return qt_trimmed(s);
6197}
6198
6199QLatin1StringView QtPrivate::trimmed(QLatin1StringView s) noexcept
6200{
6201 return qt_trimmed(s);
6202}
6203
6204/*!
6205 \fn QString QString::trimmed() const
6206
6207 Returns a string that has whitespace removed from the start and
6208 the end.
6209
6210 Whitespace means any character for which QChar::isSpace() returns
6211 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6212 '\\f', '\\r', and ' '.
6213
6214 Example:
6215
6216 \snippet qstring/main.cpp 82
6217
6218 Unlike simplified(), trimmed() leaves internal whitespace alone.
6219
6220 \sa simplified()
6221*/
6222QString QString::trimmed_helper(const QString &str)
6223{
6224 return QStringAlgorithms<const QString>::trimmed_helper(str);
6225}
6226
6227QString QString::trimmed_helper(QString &str)
6228{
6229 return QStringAlgorithms<QString>::trimmed_helper(str);
6230}
6231
6232/*! \fn const QChar QString::at(qsizetype position) const
6233
6234 Returns the character at the given index \a position in the
6235 string.
6236
6237 The \a position must be a valid index position in the string
6238 (i.e., 0 <= \a position < size()).
6239
6240 \sa operator[]()
6241*/
6242
6243/*!
6244 \fn QChar &QString::operator[](qsizetype position)
6245
6246 Returns the character at the specified \a position in the string as a
6247 modifiable reference.
6248
6249 Example:
6250
6251 \snippet qstring/main.cpp 85
6252
6253 \sa at()
6254*/
6255
6256/*!
6257 \fn const QChar QString::operator[](qsizetype position) const
6258
6259 \overload operator[]()
6260*/
6261
6262/*!
6263 \fn QChar QString::front() const
6264 \since 5.10
6265
6266 Returns the first character in the string.
6267 Same as \c{at(0)}.
6268
6269 This function is provided for STL compatibility.
6270
6271 \warning Calling this function on an empty string constitutes
6272 undefined behavior.
6273
6274 \sa back(), at(), operator[]()
6275*/
6276
6277/*!
6278 \fn QChar QString::back() const
6279 \since 5.10
6280
6281 Returns the last character in the string.
6282 Same as \c{at(size() - 1)}.
6283
6284 This function is provided for STL compatibility.
6285
6286 \warning Calling this function on an empty string constitutes
6287 undefined behavior.
6288
6289 \sa front(), at(), operator[]()
6290*/
6291
6292/*!
6293 \fn QChar &QString::front()
6294 \since 5.10
6295
6296 Returns a reference to the first character in the string.
6297 Same as \c{operator[](0)}.
6298
6299 This function is provided for STL compatibility.
6300
6301 \warning Calling this function on an empty string constitutes
6302 undefined behavior.
6303
6304 \sa back(), at(), operator[]()
6305*/
6306
6307/*!
6308 \fn QChar &QString::back()
6309 \since 5.10
6310
6311 Returns a reference to the last character in the string.
6312 Same as \c{operator[](size() - 1)}.
6313
6314 This function is provided for STL compatibility.
6315
6316 \warning Calling this function on an empty string constitutes
6317 undefined behavior.
6318
6319 \sa front(), at(), operator[]()
6320*/
6321
6322/*!
6323 \fn void QString::truncate(qsizetype position)
6324
6325 Truncates the string at the given \a position index.
6326
6327 If the specified \a position index is beyond the end of the
6328 string, nothing happens.
6329
6330 Example:
6331
6332 \snippet qstring/main.cpp 83
6333
6334 If \a position is negative, it is equivalent to passing zero.
6335
6336 \sa chop(), resize(), first(), QStringView::truncate()
6337*/
6338
6339void QString::truncate(qsizetype pos)
6340{
6341 if (pos < size())
6342 resize(size: pos);
6343}
6344
6345
6346/*!
6347 Removes \a n characters from the end of the string.
6348
6349 If \a n is greater than or equal to size(), the result is an
6350 empty string; if \a n is negative, it is equivalent to passing zero.
6351
6352 Example:
6353 \snippet qstring/main.cpp 15
6354
6355 If you want to remove characters from the \e beginning of the
6356 string, use remove() instead.
6357
6358 \sa truncate(), resize(), remove(), QStringView::chop()
6359*/
6360void QString::chop(qsizetype n)
6361{
6362 if (n > 0)
6363 resize(size: d.size - n);
6364}
6365
6366/*!
6367 Sets every character in the string to character \a ch. If \a size
6368 is different from -1 (default), the string is resized to \a
6369 size beforehand.
6370
6371 Example:
6372
6373 \snippet qstring/main.cpp 21
6374
6375 \sa resize()
6376*/
6377
6378QString& QString::fill(QChar ch, qsizetype size)
6379{
6380 resize(size: size < 0 ? d.size : size);
6381 if (d.size)
6382 std::fill(first: d.data(), last: d.data() + d.size, value: ch.unicode());
6383 return *this;
6384}
6385
6386/*!
6387 \fn qsizetype QString::length() const
6388
6389 Returns the number of characters in this string. Equivalent to
6390 size().
6391
6392 \sa resize()
6393*/
6394
6395/*!
6396 \fn qsizetype QString::size() const
6397
6398 Returns the number of characters in this string.
6399
6400 The last character in the string is at position size() - 1.
6401
6402 Example:
6403 \snippet qstring/main.cpp 58
6404
6405 \sa isEmpty(), resize()
6406*/
6407
6408/*!
6409 \fn qsizetype QString::max_size() const
6410 \fn qsizetype QString::maxSize()
6411 \since 6.8
6412
6413 It returns the maximum number of elements that the string can
6414 theoretically hold. In practice, the number can be much smaller,
6415 limited by the amount of memory available to the system.
6416*/
6417
6418/*! \fn bool QString::isNull() const
6419
6420 Returns \c true if this string is null; otherwise returns \c false.
6421
6422 Example:
6423
6424 \snippet qstring/main.cpp 28
6425
6426 Qt makes a distinction between null strings and empty strings for
6427 historical reasons. For most applications, what matters is
6428 whether or not a string contains any data, and this can be
6429 determined using the isEmpty() function.
6430
6431 \sa isEmpty()
6432*/
6433
6434/*! \fn bool QString::isEmpty() const
6435
6436 Returns \c true if the string has no characters; otherwise returns
6437 \c false.
6438
6439 Example:
6440
6441 \snippet qstring/main.cpp 27
6442
6443 \sa size()
6444*/
6445
6446/*! \fn QString &QString::operator+=(const QString &other)
6447
6448 Appends the string \a other onto the end of this string and
6449 returns a reference to this string.
6450
6451 Example:
6452
6453 \snippet qstring/main.cpp 84
6454
6455 This operation is typically very fast (\l{constant time}),
6456 because QString preallocates extra space at the end of the string
6457 data so it can grow without reallocating the entire string each
6458 time.
6459
6460 \sa append(), prepend()
6461*/
6462
6463/*! \fn QString &QString::operator+=(QLatin1StringView str)
6464
6465 \overload operator+=()
6466
6467 Appends the Latin-1 string viewed by \a str to this string.
6468*/
6469
6470/*! \fn QString &QString::operator+=(QUtf8StringView str)
6471 \since 6.5
6472 \overload operator+=()
6473
6474 Appends the UTF-8 string view \a str to this string.
6475*/
6476
6477/*! \fn QString &QString::operator+=(const QByteArray &ba)
6478
6479 \overload operator+=()
6480
6481 Appends the byte array \a ba to this string. The byte array is converted
6482 to Unicode using the fromUtf8() function. If any NUL characters ('\\0')
6483 are embedded in the \a ba byte array, they will be included in the
6484 transformation.
6485
6486 You can disable this function by defining
6487 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
6488 can be useful if you want to ensure that all user-visible strings
6489 go through QObject::tr(), for example.
6490*/
6491
6492/*! \fn QString &QString::operator+=(const char *str)
6493
6494 \overload operator+=()
6495
6496 Appends the string \a str to this string. The const char pointer
6497 is converted to Unicode using the fromUtf8() function.
6498
6499 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
6500 when you compile your applications. This can be useful if you want
6501 to ensure that all user-visible strings go through QObject::tr(),
6502 for example.
6503*/
6504
6505/*! \fn QString &QString::operator+=(QStringView str)
6506 \since 6.0
6507 \overload operator+=()
6508
6509 Appends the string view \a str to this string.
6510*/
6511
6512/*! \fn QString &QString::operator+=(QChar ch)
6513
6514 \overload operator+=()
6515
6516 Appends the character \a ch to the string.
6517*/
6518
6519/*!
6520 \fn bool QString::operator==(const char * const &lhs, const QString &rhs)
6521
6522 \overload operator==()
6523
6524 Returns \c true if \a lhs is equal to \a rhs; otherwise returns \c false.
6525 Note that no string is equal to \a lhs being 0.
6526
6527 Equivalent to \c {lhs != 0 && compare(lhs, rhs) == 0}.
6528*/
6529
6530/*!
6531 \fn bool QString::operator!=(const char * const &lhs, const QString &rhs)
6532
6533 Returns \c true if \a lhs is not equal to \a rhs; otherwise returns
6534 \c false.
6535
6536 For \a lhs != 0, this is equivalent to \c {compare(} \a lhs, \a rhs
6537 \c {) != 0}. Note that no string is equal to \a lhs being 0.
6538*/
6539
6540/*!
6541 \fn bool QString::operator<(const char * const &lhs, const QString &rhs)
6542
6543 Returns \c true if \a lhs is lexically less than \a rhs; otherwise
6544 returns \c false. For \a lhs != 0, this is equivalent to \c
6545 {compare(lhs, rhs) < 0}.
6546
6547 \sa {Comparing Strings}
6548*/
6549
6550/*!
6551 \fn bool QString::operator<=(const char * const &lhs, const QString &rhs)
6552
6553 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
6554 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6555 {compare(lhs, rhs) <= 0}.
6556
6557 \sa {Comparing Strings}
6558*/
6559
6560/*!
6561 \fn bool QString::operator>(const char * const &lhs, const QString &rhs)
6562
6563 Returns \c true if \a lhs is lexically greater than \a rhs; otherwise
6564 returns \c false. Equivalent to \c {compare(lhs, rhs) > 0}.
6565
6566 \sa {Comparing Strings}
6567*/
6568
6569/*!
6570 \fn bool QString::operator>=(const char * const &lhs, const QString &rhs)
6571
6572 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
6573 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6574 {compare(lhs, rhs) >= 0}.
6575
6576 \sa {Comparing Strings}
6577*/
6578
6579/*!
6580 \fn QString operator+(const QString &s1, const QString &s2)
6581 \fn QString operator+(QString &&s1, const QString &s2)
6582 \relates QString
6583
6584 Returns a string which is the result of concatenating \a s1 and \a
6585 s2.
6586*/
6587
6588/*!
6589 \fn QString operator+(const QString &s1, const char *s2)
6590 \relates QString
6591
6592 Returns a string which is the result of concatenating \a s1 and \a
6593 s2 (\a s2 is converted to Unicode using the QString::fromUtf8()
6594 function).
6595
6596 \sa QString::fromUtf8()
6597*/
6598
6599/*!
6600 \fn QString operator+(const char *s1, const QString &s2)
6601 \relates QString
6602
6603 Returns a string which is the result of concatenating \a s1 and \a
6604 s2 (\a s1 is converted to Unicode using the QString::fromUtf8()
6605 function).
6606
6607 \sa QString::fromUtf8()
6608*/
6609
6610/*!
6611 \fn int QString::compare(const QString &s1, const QString &s2, Qt::CaseSensitivity cs)
6612 \since 4.2
6613
6614 Compares the string \a s1 with the string \a s2 and returns a negative integer
6615 if \a s1 is less than \a s2, a positive integer if it is greater than \a s2,
6616 and zero if they are equal.
6617
6618 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
6619
6620 Case sensitive comparison is based exclusively on the numeric
6621 Unicode values of the characters and is very fast, but is not what
6622 a human would expect. Consider sorting user-visible strings with
6623 localeAwareCompare().
6624
6625 \snippet qstring/main.cpp 16
6626
6627//! [compare-isNull-vs-isEmpty]
6628 \note This function treats null strings the same as empty strings,
6629 for more details see \l {Distinction Between Null and Empty Strings}.
6630//! [compare-isNull-vs-isEmpty]
6631
6632 \sa operator==(), operator<(), operator>(), {Comparing Strings}
6633*/
6634
6635/*!
6636 \fn int QString::compare(const QString &s1, QLatin1StringView s2, Qt::CaseSensitivity cs)
6637 \since 4.2
6638 \overload compare()
6639
6640 Performs a comparison of \a s1 and \a s2, using the case
6641 sensitivity setting \a cs.
6642*/
6643
6644/*!
6645 \fn int QString::compare(QLatin1StringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6646
6647 \since 4.2
6648 \overload compare()
6649
6650 Performs a comparison of \a s1 and \a s2, using the case
6651 sensitivity setting \a cs.
6652*/
6653
6654/*!
6655 \fn int QString::compare(QStringView s, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6656
6657 \since 5.12
6658 \overload compare()
6659
6660 Performs a comparison of this with \a s, using the case
6661 sensitivity setting \a cs.
6662*/
6663
6664/*!
6665 \fn int QString::compare(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6666
6667 \since 5.14
6668 \overload compare()
6669
6670 Performs a comparison of this with \a ch, using the case
6671 sensitivity setting \a cs.
6672*/
6673
6674/*!
6675 \overload compare()
6676 \since 4.2
6677
6678 Lexically compares this string with the string \a other and returns
6679 a negative integer if this string is less than \a other, a positive
6680 integer if it is greater than \a other, and zero if they are equal.
6681
6682 Same as compare(*this, \a other, \a cs).
6683*/
6684int QString::compare(const QString &other, Qt::CaseSensitivity cs) const noexcept
6685{
6686 return QtPrivate::compareStrings(lhs: *this, rhs: other, cs);
6687}
6688
6689/*!
6690 \internal
6691 \since 4.5
6692*/
6693int QString::compare_helper(const QChar *data1, qsizetype length1, const QChar *data2, qsizetype length2,
6694 Qt::CaseSensitivity cs) noexcept
6695{
6696 Q_ASSERT(length1 >= 0);
6697 Q_ASSERT(length2 >= 0);
6698 Q_ASSERT(data1 || length1 == 0);
6699 Q_ASSERT(data2 || length2 == 0);
6700 return QtPrivate::compareStrings(lhs: QStringView(data1, length1), rhs: QStringView(data2, length2), cs);
6701}
6702
6703/*!
6704 \overload compare()
6705 \since 4.2
6706
6707 Same as compare(*this, \a other, \a cs).
6708*/
6709int QString::compare(QLatin1StringView other, Qt::CaseSensitivity cs) const noexcept
6710{
6711 return QtPrivate::compareStrings(lhs: *this, rhs: other, cs);
6712}
6713
6714/*!
6715 \internal
6716 \since 5.0
6717*/
6718int QString::compare_helper(const QChar *data1, qsizetype length1, const char *data2, qsizetype length2,
6719 Qt::CaseSensitivity cs) noexcept
6720{
6721 Q_ASSERT(length1 >= 0);
6722 Q_ASSERT(data1 || length1 == 0);
6723 if (!data2)
6724 return qt_lencmp(lhs: length1, rhs: 0);
6725 if (Q_UNLIKELY(length2 < 0))
6726 length2 = qsizetype(strlen(s: data2));
6727 return QtPrivate::compareStrings(lhs: QStringView(data1, length1),
6728 rhs: QUtf8StringView(data2, length2), cs);
6729}
6730
6731/*!
6732 \fn int QString::compare(const QString &s1, QStringView s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6733 \overload compare()
6734*/
6735
6736/*!
6737 \fn int QString::compare(QStringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6738 \overload compare()
6739*/
6740
6741bool comparesEqual(const QByteArrayView &lhs, const QChar &rhs) noexcept
6742{
6743 return QtPrivate::equalStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6744}
6745
6746Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, const QChar &rhs) noexcept
6747{
6748 const int res = QtPrivate::compareStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6749 return Qt::compareThreeWay(lhs: res, rhs: 0);
6750}
6751
6752bool comparesEqual(const QByteArrayView &lhs, char16_t rhs) noexcept
6753{
6754 return QtPrivate::equalStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6755}
6756
6757Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, char16_t rhs) noexcept
6758{
6759 const int res = QtPrivate::compareStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6760 return Qt::compareThreeWay(lhs: res, rhs: 0);
6761}
6762
6763bool comparesEqual(const QByteArray &lhs, const QChar &rhs) noexcept
6764{
6765 return QtPrivate::equalStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6766}
6767
6768Qt::strong_ordering compareThreeWay(const QByteArray &lhs, const QChar &rhs) noexcept
6769{
6770 const int res = QtPrivate::compareStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6771 return Qt::compareThreeWay(lhs: res, rhs: 0);
6772}
6773
6774bool comparesEqual(const QByteArray &lhs, char16_t rhs) noexcept
6775{
6776 return QtPrivate::equalStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6777}
6778
6779Qt::strong_ordering compareThreeWay(const QByteArray &lhs, char16_t rhs) noexcept
6780{
6781 const int res = QtPrivate::compareStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6782 return Qt::compareThreeWay(lhs: res, rhs: 0);
6783}
6784
6785/*!
6786 \internal
6787 \since 6.8
6788*/
6789bool QT_FASTCALL QChar::equal_helper(QChar lhs, const char *rhs) noexcept
6790{
6791 return QtPrivate::equalStrings(lhs: QStringView(&lhs, 1), rhs: QUtf8StringView(rhs));
6792}
6793
6794int QT_FASTCALL QChar::compare_helper(QChar lhs, const char *rhs) noexcept
6795{
6796 return QtPrivate::compareStrings(lhs: QStringView(&lhs, 1), rhs: QUtf8StringView(rhs));
6797}
6798
6799/*!
6800 \internal
6801 \since 6.8
6802*/
6803bool QStringView::equal_helper(QStringView sv, const char *data, qsizetype len)
6804{
6805 Q_ASSERT(len >= 0);
6806 Q_ASSERT(data || len == 0);
6807 return QtPrivate::equalStrings(lhs: sv, rhs: QUtf8StringView(data, len));
6808}
6809
6810/*!
6811 \internal
6812 \since 6.8
6813*/
6814int QStringView::compare_helper(QStringView sv, const char *data, qsizetype len)
6815{
6816 Q_ASSERT(len >= 0);
6817 Q_ASSERT(data || len == 0);
6818 return QtPrivate::compareStrings(lhs: sv, rhs: QUtf8StringView(data, len));
6819}
6820
6821/*!
6822 \internal
6823 \since 6.8
6824*/
6825bool QLatin1StringView::equal_helper(QLatin1StringView s1, const char *s2, qsizetype len) noexcept
6826{
6827 // because qlatin1stringview.h can't include qutf8stringview.h
6828 Q_ASSERT(len >= 0);
6829 Q_ASSERT(s2 || len == 0);
6830 return QtPrivate::equalStrings(lhs: s1, rhs: QUtf8StringView(s2, len));
6831}
6832
6833/*!
6834 \internal
6835 \since 6.6
6836*/
6837int QLatin1StringView::compare_helper(const QLatin1StringView &s1, const char *s2, qsizetype len) noexcept
6838{
6839 // because qlatin1stringview.h can't include qutf8stringview.h
6840 Q_ASSERT(len >= 0);
6841 Q_ASSERT(s2 || len == 0);
6842 return QtPrivate::compareStrings(lhs: s1, rhs: QUtf8StringView(s2, len));
6843}
6844
6845/*!
6846 \internal
6847 \since 4.5
6848*/
6849int QLatin1StringView::compare_helper(const QChar *data1, qsizetype length1, QLatin1StringView s2,
6850 Qt::CaseSensitivity cs) noexcept
6851{
6852 Q_ASSERT(length1 >= 0);
6853 Q_ASSERT(data1 || length1 == 0);
6854 return QtPrivate::compareStrings(lhs: QStringView(data1, length1), rhs: s2, cs);
6855}
6856
6857/*!
6858 \fn int QString::localeAwareCompare(const QString & s1, const QString & s2)
6859
6860 Compares \a s1 with \a s2 and returns an integer less than, equal
6861 to, or greater than zero if \a s1 is less than, equal to, or
6862 greater than \a s2.
6863
6864 The comparison is performed in a locale- and also
6865 platform-dependent manner. Use this function to present sorted
6866 lists of strings to the user.
6867
6868 \sa compare(), QLocale, {Comparing Strings}
6869*/
6870
6871/*!
6872 \fn int QString::localeAwareCompare(QStringView other) const
6873 \since 6.0
6874 \overload localeAwareCompare()
6875
6876 Compares this string with the \a other string and returns an
6877 integer less than, equal to, or greater than zero if this string
6878 is less than, equal to, or greater than the \a other string.
6879
6880 The comparison is performed in a locale- and also
6881 platform-dependent manner. Use this function to present sorted
6882 lists of strings to the user.
6883
6884 Same as \c {localeAwareCompare(*this, other)}.
6885
6886 \sa {Comparing Strings}
6887*/
6888
6889/*!
6890 \fn int QString::localeAwareCompare(QStringView s1, QStringView s2)
6891 \since 6.0
6892 \overload localeAwareCompare()
6893
6894 Compares \a s1 with \a s2 and returns an integer less than, equal
6895 to, or greater than zero if \a s1 is less than, equal to, or
6896 greater than \a s2.
6897
6898 The comparison is performed in a locale- and also
6899 platform-dependent manner. Use this function to present sorted
6900 lists of strings to the user.
6901
6902 \sa {Comparing Strings}
6903*/
6904
6905
6906#if !defined(CSTR_LESS_THAN)
6907#define CSTR_LESS_THAN 1
6908#define CSTR_EQUAL 2
6909#define CSTR_GREATER_THAN 3
6910#endif
6911
6912/*!
6913 \overload localeAwareCompare()
6914
6915 Compares this string with the \a other string and returns an
6916 integer less than, equal to, or greater than zero if this string
6917 is less than, equal to, or greater than the \a other string.
6918
6919 The comparison is performed in a locale- and also
6920 platform-dependent manner. Use this function to present sorted
6921 lists of strings to the user.
6922
6923 Same as \c {localeAwareCompare(*this, other)}.
6924
6925 \sa {Comparing Strings}
6926*/
6927int QString::localeAwareCompare(const QString &other) const
6928{
6929 return localeAwareCompare_helper(data1: constData(), length1: size(), data2: other.constData(), length2: other.size());
6930}
6931
6932/*!
6933 \internal
6934 \since 4.5
6935*/
6936int QString::localeAwareCompare_helper(const QChar *data1, qsizetype length1,
6937 const QChar *data2, qsizetype length2)
6938{
6939 Q_ASSERT(length1 >= 0);
6940 Q_ASSERT(data1 || length1 == 0);
6941 Q_ASSERT(length2 >= 0);
6942 Q_ASSERT(data2 || length2 == 0);
6943
6944 // do the right thing for null and empty
6945 if (length1 == 0 || length2 == 0)
6946 return QtPrivate::compareStrings(lhs: QStringView(data1, length1), rhs: QStringView(data2, length2),
6947 cs: Qt::CaseSensitive);
6948
6949#if QT_CONFIG(icu)
6950 return QCollator::defaultCompare(s1: QStringView(data1, length1), s2: QStringView(data2, length2));
6951#else
6952 const QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C);
6953 const QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C);
6954# if defined(Q_OS_WIN)
6955 int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPWSTR)lhs.constData(), lhs.length(), (LPWSTR)rhs.constData(), rhs.length(), NULL, NULL, 0);
6956
6957 switch (res) {
6958 case CSTR_LESS_THAN:
6959 return -1;
6960 case CSTR_GREATER_THAN:
6961 return 1;
6962 default:
6963 return 0;
6964 }
6965# elif defined (Q_OS_DARWIN)
6966 // Use CFStringCompare for comparing strings on Mac. This makes Qt order
6967 // strings the same way as native applications do, and also respects
6968 // the "Order for sorted lists" setting in the International preferences
6969 // panel.
6970 const CFStringRef thisString =
6971 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6972 reinterpret_cast<const UniChar *>(lhs.constData()), lhs.length(), kCFAllocatorNull);
6973 const CFStringRef otherString =
6974 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6975 reinterpret_cast<const UniChar *>(rhs.constData()), rhs.length(), kCFAllocatorNull);
6976
6977 const int result = CFStringCompare(thisString, otherString, kCFCompareLocalized);
6978 CFRelease(thisString);
6979 CFRelease(otherString);
6980 return result;
6981# elif defined(Q_OS_UNIX)
6982 // declared in <string.h> (no better than QtPrivate::compareStrings() on Android, sadly)
6983 return strcoll(lhs.toLocal8Bit().constData(), rhs.toLocal8Bit().constData());
6984# else
6985# error "This case shouldn't happen"
6986 return QtPrivate::compareStrings(lhs, rhs, Qt::CaseSensitive);
6987# endif
6988#endif // !QT_CONFIG(icu)
6989}
6990
6991
6992/*!
6993 \fn const QChar *QString::unicode() const
6994
6995 Returns a Unicode representation of the string.
6996 The result remains valid until the string is modified.
6997
6998 \note The returned string may not be '\\0'-terminated.
6999 Use size() to determine the length of the array.
7000
7001 \sa utf16(), fromRawData()
7002*/
7003
7004/*!
7005 \fn const ushort *QString::utf16() const
7006
7007 Returns the QString as a '\\0\'-terminated array of unsigned
7008 shorts. The result remains valid until the string is modified.
7009
7010 The returned string is in host byte order.
7011
7012 \sa unicode()
7013*/
7014
7015const ushort *QString::utf16() const
7016{
7017 if (!d->isMutable()) {
7018 // ensure '\0'-termination for ::fromRawData strings
7019 const_cast<QString*>(this)->reallocData(alloc: d.size, option: QArrayData::KeepSize);
7020 }
7021 return reinterpret_cast<const ushort *>(d.data());
7022}
7023
7024/*!
7025 Returns a string of size \a width that contains this string
7026 padded by the \a fill character.
7027
7028 If \a truncate is \c false and the size() of the string is more than
7029 \a width, then the returned string is a copy of the string.
7030
7031 \snippet qstring/main.cpp 32
7032
7033 If \a truncate is \c true and the size() of the string is more than
7034 \a width, then any characters in a copy of the string after
7035 position \a width are removed, and the copy is returned.
7036
7037 \snippet qstring/main.cpp 33
7038
7039 \sa rightJustified()
7040*/
7041
7042QString QString::leftJustified(qsizetype width, QChar fill, bool truncate) const
7043{
7044 QString result;
7045 qsizetype len = size();
7046 qsizetype padlen = width - len;
7047 if (padlen > 0) {
7048 result.resize(size: len+padlen);
7049 if (len)
7050 memcpy(dest: result.d.data(), src: d.data(), n: sizeof(QChar)*len);
7051 QChar *uc = (QChar*)result.d.data() + len;
7052 while (padlen--)
7053 * uc++ = fill;
7054 } else {
7055 if (truncate)
7056 result = left(n: width);
7057 else
7058 result = *this;
7059 }
7060 return result;
7061}
7062
7063/*!
7064 Returns a string of size() \a width that contains the \a fill
7065 character followed by the string. For example:
7066
7067 \snippet qstring/main.cpp 49
7068
7069 If \a truncate is \c false and the size() of the string is more than
7070 \a width, then the returned string is a copy of the string.
7071
7072 If \a truncate is true and the size() of the string is more than
7073 \a width, then the resulting string is truncated at position \a
7074 width.
7075
7076 \snippet qstring/main.cpp 50
7077
7078 \sa leftJustified()
7079*/
7080
7081QString QString::rightJustified(qsizetype width, QChar fill, bool truncate) const
7082{
7083 QString result;
7084 qsizetype len = size();
7085 qsizetype padlen = width - len;
7086 if (padlen > 0) {
7087 result.resize(size: len+padlen);
7088 QChar *uc = (QChar*)result.d.data();
7089 while (padlen--)
7090 * uc++ = fill;
7091 if (len)
7092 memcpy(dest: static_cast<void *>(uc), src: static_cast<const void *>(d.data()), n: sizeof(QChar)*len);
7093 } else {
7094 if (truncate)
7095 result = left(n: width);
7096 else
7097 result = *this;
7098 }
7099 return result;
7100}
7101
7102/*!
7103 \fn QString QString::toLower() const
7104
7105 Returns a lowercase copy of the string.
7106
7107 \snippet qstring/main.cpp 75
7108
7109 The case conversion will always happen in the 'C' locale. For
7110 locale-dependent case folding use QLocale::toLower()
7111
7112 \sa toUpper(), QLocale::toLower()
7113*/
7114
7115namespace QUnicodeTables {
7116/*
7117 \internal
7118 Converts the \a str string starting from the position pointed to by the \a
7119 it iterator, using the Unicode case traits \c Traits, and returns the
7120 result. The input string must not be empty (the convertCase function below
7121 guarantees that).
7122
7123 The string type \c{T} is also a template and is either \c{const QString} or
7124 \c{QString}. This function can do both copy-conversion and in-place
7125 conversion depending on the state of the \a str parameter:
7126 \list
7127 \li \c{T} is \c{const QString}: copy-convert
7128 \li \c{T} is \c{QString} and its refcount != 1: copy-convert
7129 \li \c{T} is \c{QString} and its refcount == 1: in-place convert
7130 \endlist
7131
7132 In copy-convert mode, the local variable \c{s} is detached from the input
7133 \a str. In the in-place convert mode, \a str is in moved-from state and
7134 \c{s} contains the only copy of the string, without reallocation (thus,
7135 \a it is still valid).
7136
7137 There is one pathological case left: when the in-place conversion needs to
7138 reallocate memory to grow the buffer. In that case, we need to adjust the \a
7139 it pointer.
7140 */
7141template <typename T>
7142Q_NEVER_INLINE
7143static QString detachAndConvertCase(T &str, QStringIterator it, QUnicodeTables::Case which)
7144{
7145 Q_ASSERT(!str.isEmpty());
7146 QString s = std::move(str); // will copy if T is const QString
7147 QChar *pp = s.begin() + it.index(); // will detach if necessary
7148
7149 do {
7150 const auto folded = fullConvertCase(uc: it.next(), which);
7151 if (Q_UNLIKELY(folded.size() > 1)) {
7152 if (folded.chars[0] == *pp && folded.size() == 2) {
7153 // special case: only second actually changed (e.g. surrogate pairs),
7154 // avoid slow case
7155 ++pp;
7156 *pp++ = folded.chars[1];
7157 } else {
7158 // slow path: the string is growing
7159 qsizetype inpos = it.index() - 1;
7160 qsizetype outpos = pp - s.constBegin();
7161
7162 s.replace(pos: outpos, len: 1, after: reinterpret_cast<const QChar *>(folded.data()), alen: folded.size());
7163 pp = const_cast<QChar *>(s.constBegin()) + outpos + folded.size();
7164
7165 // Adjust the input iterator if we are performing an in-place conversion
7166 if constexpr (!std::is_const<T>::value)
7167 it = QStringIterator(s.constBegin(), inpos + folded.size(), s.constEnd());
7168 }
7169 } else {
7170 *pp++ = folded.chars[0];
7171 }
7172 } while (it.hasNext());
7173
7174 return s;
7175}
7176
7177template <typename T>
7178static QString convertCase(T &str, QUnicodeTables::Case which)
7179{
7180 const QChar *p = str.constBegin();
7181 const QChar *e = p + str.size();
7182
7183 // this avoids out of bounds check in the loop
7184 while (e != p && e[-1].isHighSurrogate())
7185 --e;
7186
7187 QStringIterator it(p, e);
7188 while (it.hasNext()) {
7189 const char32_t uc = it.next();
7190 if (qGetProp(ucs4: uc)->cases[which].diff) {
7191 it.recede();
7192 return detachAndConvertCase(str, it, which);
7193 }
7194 }
7195 return std::move(str);
7196}
7197} // namespace QUnicodeTables
7198
7199QString QString::toLower_helper(const QString &str)
7200{
7201 return QUnicodeTables::convertCase(str, which: QUnicodeTables::LowerCase);
7202}
7203
7204QString QString::toLower_helper(QString &str)
7205{
7206 return QUnicodeTables::convertCase(str, which: QUnicodeTables::LowerCase);
7207}
7208
7209/*!
7210 \fn QString QString::toCaseFolded() const
7211
7212 Returns the case folded equivalent of the string. For most Unicode
7213 characters this is the same as toLower().
7214*/
7215
7216QString QString::toCaseFolded_helper(const QString &str)
7217{
7218 return QUnicodeTables::convertCase(str, which: QUnicodeTables::CaseFold);
7219}
7220
7221QString QString::toCaseFolded_helper(QString &str)
7222{
7223 return QUnicodeTables::convertCase(str, which: QUnicodeTables::CaseFold);
7224}
7225
7226/*!
7227 \fn QString QString::toUpper() const
7228
7229 Returns an uppercase copy of the string.
7230
7231 \snippet qstring/main.cpp 81
7232
7233 The case conversion will always happen in the 'C' locale. For
7234 locale-dependent case folding use QLocale::toUpper().
7235
7236 \note In some cases the uppercase form of a string may be longer than the
7237 original.
7238
7239 \sa toLower(), QLocale::toLower()
7240*/
7241
7242QString QString::toUpper_helper(const QString &str)
7243{
7244 return QUnicodeTables::convertCase(str, which: QUnicodeTables::UpperCase);
7245}
7246
7247QString QString::toUpper_helper(QString &str)
7248{
7249 return QUnicodeTables::convertCase(str, which: QUnicodeTables::UpperCase);
7250}
7251
7252/*!
7253 \since 5.5
7254
7255 Safely builds a formatted string from the format string \a cformat
7256 and an arbitrary list of arguments.
7257
7258 The format string supports the conversion specifiers, length modifiers,
7259 and flags provided by printf() in the standard C++ library. The \a cformat
7260 string and \c{%s} arguments must be UTF-8 encoded.
7261
7262 \note The \c{%lc} escape sequence expects a unicode character of type
7263 \c char16_t, or \c ushort (as returned by QChar::unicode()).
7264 The \c{%ls} escape sequence expects a pointer to a zero-terminated array
7265 of unicode characters of type \c char16_t, or ushort (as returned by
7266 QString::utf16()). This is at odds with the printf() in the standard C++
7267 library, which defines \c {%lc} to print a wchar_t and \c{%ls} to print
7268 a \c{wchar_t*}, and might also produce compiler warnings on platforms
7269 where the size of \c {wchar_t} is not 16 bits.
7270
7271 \warning We do not recommend using QString::asprintf() in new Qt
7272 code. Instead, consider using QTextStream or arg(), both of
7273 which support Unicode strings seamlessly and are type-safe.
7274 Here is an example that uses QTextStream:
7275
7276 \snippet qstring/main.cpp 64
7277
7278 For \l {QObject::tr()}{translations}, especially if the strings
7279 contains more than one escape sequence, you should consider using
7280 the arg() function instead. This allows the order of the
7281 replacements to be controlled by the translator.
7282
7283 \sa arg()
7284*/
7285
7286QString QString::asprintf(const char *cformat, ...)
7287{
7288 va_list ap;
7289 va_start(ap, cformat);
7290 const QString s = vasprintf(format: cformat, ap);
7291 va_end(ap);
7292 return s;
7293}
7294
7295static void append_utf8(QString &qs, const char *cs, qsizetype len)
7296{
7297 const qsizetype oldSize = qs.size();
7298 qs.resize(size: oldSize + len);
7299 const QChar *newEnd = QUtf8::convertToUnicode(buffer: qs.data() + oldSize, in: QByteArrayView(cs, len));
7300 qs.resize(size: newEnd - qs.constData());
7301}
7302
7303static uint parse_flag_characters(const char * &c) noexcept
7304{
7305 uint flags = QLocaleData::ZeroPadExponent;
7306 while (true) {
7307 switch (*c) {
7308 case '#':
7309 flags |= QLocaleData::ShowBase | QLocaleData::AddTrailingZeroes
7310 | QLocaleData::ForcePoint;
7311 break;
7312 case '0': flags |= QLocaleData::ZeroPadded; break;
7313 case '-': flags |= QLocaleData::LeftAdjusted; break;
7314 case ' ': flags |= QLocaleData::BlankBeforePositive; break;
7315 case '+': flags |= QLocaleData::AlwaysShowSign; break;
7316 case '\'': flags |= QLocaleData::GroupDigits; break;
7317 default: return flags;
7318 }
7319 ++c;
7320 }
7321}
7322
7323static int parse_field_width(const char *&c, qsizetype size)
7324{
7325 Q_ASSERT(isAsciiDigit(*c));
7326 const char *const stop = c + size;
7327
7328 // can't be negative - started with a digit
7329 // contains at least one digit
7330 auto [result, used] = qstrntoull(nptr: c, size, base: 10);
7331 c += used;
7332 if (used <= 0)
7333 return false;
7334 // preserve Qt 5.5 behavior of consuming all digits, no matter how many
7335 while (c < stop && isAsciiDigit(c: *c))
7336 ++c;
7337 return result < qulonglong(std::numeric_limits<int>::max()) ? int(result) : 0;
7338}
7339
7340enum LengthMod { lm_none, lm_hh, lm_h, lm_l, lm_ll, lm_L, lm_j, lm_z, lm_t };
7341
7342static inline bool can_consume(const char * &c, char ch) noexcept
7343{
7344 if (*c == ch) {
7345 ++c;
7346 return true;
7347 }
7348 return false;
7349}
7350
7351static LengthMod parse_length_modifier(const char * &c) noexcept
7352{
7353 switch (*c++) {
7354 case 'h': return can_consume(c, ch: 'h') ? lm_hh : lm_h;
7355 case 'l': return can_consume(c, ch: 'l') ? lm_ll : lm_l;
7356 case 'L': return lm_L;
7357 case 'j': return lm_j;
7358 case 'z':
7359 case 'Z': return lm_z;
7360 case 't': return lm_t;
7361 }
7362 --c; // don't consume *c - it wasn't a flag
7363 return lm_none;
7364}
7365
7366/*!
7367 \fn QString QString::vasprintf(const char *cformat, va_list ap)
7368 \since 5.5
7369
7370 Equivalent method to asprintf(), but takes a va_list \a ap
7371 instead a list of variable arguments. See the asprintf()
7372 documentation for an explanation of \a cformat.
7373
7374 This method does not call the va_end macro, the caller
7375 is responsible to call va_end on \a ap.
7376
7377 \sa asprintf()
7378*/
7379
7380QString QString::vasprintf(const char *cformat, va_list ap)
7381{
7382 if (!cformat || !*cformat) {
7383 // Qt 1.x compat
7384 return fromLatin1(ba: "");
7385 }
7386
7387 // Parse cformat
7388
7389 QString result;
7390 const char *c = cformat;
7391 const char *formatEnd = cformat + qstrlen(str: cformat);
7392 for (;;) {
7393 // Copy non-escape chars to result
7394 const char *cb = c;
7395 while (*c != '\0' && *c != '%')
7396 c++;
7397 append_utf8(qs&: result, cs: cb, len: qsizetype(c - cb));
7398
7399 if (*c == '\0')
7400 break;
7401
7402 // Found '%'
7403 const char *escape_start = c;
7404 ++c;
7405
7406 if (*c == '\0') {
7407 result.append(ch: u'%'); // a % at the end of the string - treat as non-escape text
7408 break;
7409 }
7410 if (*c == '%') {
7411 result.append(ch: u'%'); // %%
7412 ++c;
7413 continue;
7414 }
7415
7416 uint flags = parse_flag_characters(c);
7417
7418 if (*c == '\0') {
7419 result.append(str: QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7420 break;
7421 }
7422
7423 // Parse field width
7424 int width = -1; // -1 means unspecified
7425 if (isAsciiDigit(c: *c)) {
7426 width = parse_field_width(c, size: formatEnd - c);
7427 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7428 width = va_arg(ap, int);
7429 if (width < 0)
7430 width = -1; // treat all negative numbers as unspecified
7431 ++c;
7432 }
7433
7434 if (*c == '\0') {
7435 result.append(str: QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7436 break;
7437 }
7438
7439 // Parse precision
7440 int precision = -1; // -1 means unspecified
7441 if (*c == '.') {
7442 ++c;
7443 precision = 0;
7444 if (isAsciiDigit(c: *c)) {
7445 precision = parse_field_width(c, size: formatEnd - c);
7446 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7447 precision = va_arg(ap, int);
7448 if (precision < 0)
7449 precision = -1; // treat all negative numbers as unspecified
7450 ++c;
7451 }
7452 }
7453
7454 if (*c == '\0') {
7455 result.append(str: QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7456 break;
7457 }
7458
7459 const LengthMod length_mod = parse_length_modifier(c);
7460
7461 if (*c == '\0') {
7462 result.append(str: QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7463 break;
7464 }
7465
7466 // Parse the conversion specifier and do the conversion
7467 QString subst;
7468 switch (*c) {
7469 case 'd':
7470 case 'i': {
7471 qint64 i;
7472 switch (length_mod) {
7473 case lm_none: i = va_arg(ap, int); break;
7474 case lm_hh: i = va_arg(ap, int); break;
7475 case lm_h: i = va_arg(ap, int); break;
7476 case lm_l: i = va_arg(ap, long int); break;
7477 case lm_ll: i = va_arg(ap, qint64); break;
7478 case lm_j: i = va_arg(ap, long int); break;
7479
7480 /* ptrdiff_t actually, but it should be the same for us */
7481 case lm_z: i = va_arg(ap, qsizetype); break;
7482 case lm_t: i = va_arg(ap, qsizetype); break;
7483 default: i = 0; break;
7484 }
7485 subst = QLocaleData::c()->longLongToString(l: i, precision, base: 10, width, flags);
7486 ++c;
7487 break;
7488 }
7489 case 'o':
7490 case 'u':
7491 case 'x':
7492 case 'X': {
7493 quint64 u;
7494 switch (length_mod) {
7495 case lm_none: u = va_arg(ap, uint); break;
7496 case lm_hh: u = va_arg(ap, uint); break;
7497 case lm_h: u = va_arg(ap, uint); break;
7498 case lm_l: u = va_arg(ap, ulong); break;
7499 case lm_ll: u = va_arg(ap, quint64); break;
7500 case lm_t: u = va_arg(ap, size_t); break;
7501 case lm_z: u = va_arg(ap, size_t); break;
7502 default: u = 0; break;
7503 }
7504
7505 if (isAsciiUpper(c: *c))
7506 flags |= QLocaleData::CapitalEorX;
7507
7508 int base = 10;
7509 switch (QtMiscUtils::toAsciiLower(ch: *c)) {
7510 case 'o':
7511 base = 8; break;
7512 case 'u':
7513 base = 10; break;
7514 case 'x':
7515 base = 16; break;
7516 default: break;
7517 }
7518 subst = QLocaleData::c()->unsLongLongToString(l: u, precision, base, width, flags);
7519 ++c;
7520 break;
7521 }
7522 case 'E':
7523 case 'e':
7524 case 'F':
7525 case 'f':
7526 case 'G':
7527 case 'g':
7528 case 'A':
7529 case 'a': {
7530 double d;
7531 if (length_mod == lm_L)
7532 d = va_arg(ap, long double); // not supported - converted to a double
7533 else
7534 d = va_arg(ap, double);
7535
7536 if (isAsciiUpper(c: *c))
7537 flags |= QLocaleData::CapitalEorX;
7538
7539 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
7540 switch (QtMiscUtils::toAsciiLower(ch: *c)) {
7541 case 'e': form = QLocaleData::DFExponent; break;
7542 case 'a': // not supported - decimal form used instead
7543 case 'f': form = QLocaleData::DFDecimal; break;
7544 case 'g': form = QLocaleData::DFSignificantDigits; break;
7545 default: break;
7546 }
7547 subst = QLocaleData::c()->doubleToString(d, precision, form, width, flags);
7548 ++c;
7549 break;
7550 }
7551 case 'c': {
7552 if (length_mod == lm_l)
7553 subst = QChar::fromUcs2(va_arg(ap, int));
7554 else
7555 subst = QLatin1Char((uchar) va_arg(ap, int));
7556 ++c;
7557 break;
7558 }
7559 case 's': {
7560 if (length_mod == lm_l) {
7561 const ushort *buff = va_arg(ap, const ushort*);
7562 const ushort *ch = buff;
7563 while (precision != 0 && *ch != 0) {
7564 ++ch;
7565 --precision;
7566 }
7567 subst.setUtf16(autf16: buff, asize: ch - buff);
7568 } else if (precision == -1) {
7569 subst = QString::fromUtf8(va_arg(ap, const char*));
7570 } else {
7571 const char *buff = va_arg(ap, const char*);
7572 subst = QString::fromUtf8(utf8: buff, size: qstrnlen(str: buff, maxlen: precision));
7573 }
7574 ++c;
7575 break;
7576 }
7577 case 'p': {
7578 void *arg = va_arg(ap, void*);
7579 const quint64 i = reinterpret_cast<quintptr>(arg);
7580 flags |= QLocaleData::ShowBase;
7581 subst = QLocaleData::c()->unsLongLongToString(l: i, precision, base: 16, width, flags);
7582 ++c;
7583 break;
7584 }
7585 case 'n':
7586 switch (length_mod) {
7587 case lm_hh: {
7588 signed char *n = va_arg(ap, signed char*);
7589 *n = result.size();
7590 break;
7591 }
7592 case lm_h: {
7593 short int *n = va_arg(ap, short int*);
7594 *n = result.size();
7595 break;
7596 }
7597 case lm_l: {
7598 long int *n = va_arg(ap, long int*);
7599 *n = result.size();
7600 break;
7601 }
7602 case lm_ll: {
7603 qint64 *n = va_arg(ap, qint64*);
7604 *n = result.size();
7605 break;
7606 }
7607 default: {
7608 int *n = va_arg(ap, int*);
7609 *n = int(result.size());
7610 break;
7611 }
7612 }
7613 ++c;
7614 break;
7615
7616 default: // bad escape, treat as non-escape text
7617 for (const char *cc = escape_start; cc != c; ++cc)
7618 result.append(ch: QLatin1Char(*cc));
7619 continue;
7620 }
7621
7622 if (flags & QLocaleData::LeftAdjusted)
7623 result.append(str: subst.leftJustified(width));
7624 else
7625 result.append(str: subst.rightJustified(width));
7626 }
7627
7628 return result;
7629}
7630
7631/*!
7632 \fn QString::toLongLong(bool *ok, int base) const
7633
7634 Returns the string converted to a \c{long long} using base \a
7635 base, which is 10 by default and must be between 2 and 36, or 0.
7636 Returns 0 if the conversion fails.
7637
7638 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7639 to \c false, and success by setting *\a{ok} to \c true.
7640
7641 If \a base is 0, the C language convention is used: if the string begins
7642 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7643 2 is used; otherwise, if the string begins with "0", base 8 is used;
7644 otherwise, base 10 is used.
7645
7646 The string conversion will always happen in the 'C' locale. For
7647 locale-dependent conversion use QLocale::toLongLong()
7648
7649 Example:
7650
7651 \snippet qstring/main.cpp 74
7652
7653 This function ignores leading and trailing whitespace.
7654
7655 \note Support for the "0b" prefix was added in Qt 6.4.
7656
7657 \sa number(), toULongLong(), toInt(), QLocale::toLongLong()
7658*/
7659
7660template <typename Int>
7661static Int toIntegral(QStringView string, bool *ok, int base)
7662{
7663#if defined(QT_CHECK_RANGE)
7664 if (base != 0 && (base < 2 || base > 36)) {
7665 qWarning("QString::toIntegral: Invalid base (%d)", base);
7666 base = 10;
7667 }
7668#endif
7669
7670 QVarLengthArray<uchar> latin1(string.size());
7671 qt_to_latin1(dst: latin1.data(), src: string.utf16(), length: string.size());
7672 QSimpleParsedNumber<Int> r;
7673 if constexpr (std::is_signed_v<Int>)
7674 r = QLocaleData::bytearrayToLongLong(num: latin1, base);
7675 else
7676 r = QLocaleData::bytearrayToUnsLongLong(num: latin1, base);
7677 if (ok)
7678 *ok = r.ok();
7679 return r.result;
7680}
7681
7682qlonglong QString::toIntegral_helper(QStringView string, bool *ok, int base)
7683{
7684 return toIntegral<qlonglong>(string, ok, base);
7685}
7686
7687/*!
7688 \fn QString::toULongLong(bool *ok, int base) const
7689
7690 Returns the string converted to an \c{unsigned long long} using base \a
7691 base, which is 10 by default and must be between 2 and 36, or 0.
7692 Returns 0 if the conversion fails.
7693
7694 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7695 to \c false, and success by setting *\a{ok} to \c true.
7696
7697 If \a base is 0, the C language convention is used: if the string begins
7698 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7699 2 is used; otherwise, if the string begins with "0", base 8 is used;
7700 otherwise, base 10 is used.
7701
7702 The string conversion will always happen in the 'C' locale. For
7703 locale-dependent conversion use QLocale::toULongLong()
7704
7705 Example:
7706
7707 \snippet qstring/main.cpp 79
7708
7709 This function ignores leading and trailing whitespace.
7710
7711 \note Support for the "0b" prefix was added in Qt 6.4.
7712
7713 \sa number(), toLongLong(), QLocale::toULongLong()
7714*/
7715
7716qulonglong QString::toIntegral_helper(QStringView string, bool *ok, uint base)
7717{
7718 return toIntegral<qulonglong>(string, ok, base);
7719}
7720
7721/*!
7722 \fn long QString::toLong(bool *ok, int base) const
7723
7724 Returns the string converted to a \c long using base \a
7725 base, which is 10 by default and must be between 2 and 36, or 0.
7726 Returns 0 if the conversion fails.
7727
7728 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7729 to \c false, and success by setting *\a{ok} to \c true.
7730
7731 If \a base is 0, the C language convention is used: if the string begins
7732 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7733 2 is used; otherwise, if the string begins with "0", base 8 is used;
7734 otherwise, base 10 is used.
7735
7736 The string conversion will always happen in the 'C' locale. For
7737 locale-dependent conversion use QLocale::toLongLong()
7738
7739 Example:
7740
7741 \snippet qstring/main.cpp 73
7742
7743 This function ignores leading and trailing whitespace.
7744
7745 \note Support for the "0b" prefix was added in Qt 6.4.
7746
7747 \sa number(), toULong(), toInt(), QLocale::toInt()
7748*/
7749
7750/*!
7751 \fn ulong QString::toULong(bool *ok, int base) const
7752
7753 Returns the string converted to an \c{unsigned long} using base \a
7754 base, which is 10 by default and must be between 2 and 36, or 0.
7755 Returns 0 if the conversion fails.
7756
7757 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7758 to \c false, and success by setting *\a{ok} to \c true.
7759
7760 If \a base is 0, the C language convention is used: if the string begins
7761 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7762 2 is used; otherwise, if the string begins with "0", base 8 is used;
7763 otherwise, base 10 is used.
7764
7765 The string conversion will always happen in the 'C' locale. For
7766 locale-dependent conversion use QLocale::toULongLong()
7767
7768 Example:
7769
7770 \snippet qstring/main.cpp 78
7771
7772 This function ignores leading and trailing whitespace.
7773
7774 \note Support for the "0b" prefix was added in Qt 6.4.
7775
7776 \sa number(), QLocale::toUInt()
7777*/
7778
7779/*!
7780 \fn int QString::toInt(bool *ok, int base) const
7781 Returns the string converted to an \c int using base \a
7782 base, which is 10 by default and must be between 2 and 36, or 0.
7783 Returns 0 if the conversion fails.
7784
7785 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7786 to \c false, and success by setting *\a{ok} to \c true.
7787
7788 If \a base is 0, the C language convention is used: if the string begins
7789 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7790 2 is used; otherwise, if the string begins with "0", base 8 is used;
7791 otherwise, base 10 is used.
7792
7793 The string conversion will always happen in the 'C' locale. For
7794 locale-dependent conversion use QLocale::toInt()
7795
7796 Example:
7797
7798 \snippet qstring/main.cpp 72
7799
7800 This function ignores leading and trailing whitespace.
7801
7802 \note Support for the "0b" prefix was added in Qt 6.4.
7803
7804 \sa number(), toUInt(), toDouble(), QLocale::toInt()
7805*/
7806
7807/*!
7808 \fn uint QString::toUInt(bool *ok, int base) const
7809 Returns the string converted to an \c{unsigned int} using base \a
7810 base, which is 10 by default and must be between 2 and 36, or 0.
7811 Returns 0 if the conversion fails.
7812
7813 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7814 to \c false, and success by setting *\a{ok} to \c true.
7815
7816 If \a base is 0, the C language convention is used: if the string begins
7817 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7818 2 is used; otherwise, if the string begins with "0", base 8 is used;
7819 otherwise, base 10 is used.
7820
7821 The string conversion will always happen in the 'C' locale. For
7822 locale-dependent conversion use QLocale::toUInt()
7823
7824 Example:
7825
7826 \snippet qstring/main.cpp 77
7827
7828 This function ignores leading and trailing whitespace.
7829
7830 \note Support for the "0b" prefix was added in Qt 6.4.
7831
7832 \sa number(), toInt(), QLocale::toUInt()
7833*/
7834
7835/*!
7836 \fn short QString::toShort(bool *ok, int base) const
7837
7838 Returns the string converted to a \c short using base \a
7839 base, which is 10 by default and must be between 2 and 36, or 0.
7840 Returns 0 if the conversion fails.
7841
7842 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7843 to \c false, and success by setting *\a{ok} to \c true.
7844
7845 If \a base is 0, the C language convention is used: if the string begins
7846 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7847 2 is used; otherwise, if the string begins with "0", base 8 is used;
7848 otherwise, base 10 is used.
7849
7850 The string conversion will always happen in the 'C' locale. For
7851 locale-dependent conversion use QLocale::toShort()
7852
7853 Example:
7854
7855 \snippet qstring/main.cpp 76
7856
7857 This function ignores leading and trailing whitespace.
7858
7859 \note Support for the "0b" prefix was added in Qt 6.4.
7860
7861 \sa number(), toUShort(), toInt(), QLocale::toShort()
7862*/
7863
7864/*!
7865 \fn ushort QString::toUShort(bool *ok, int base) const
7866
7867 Returns the string converted to an \c{unsigned short} using base \a
7868 base, which is 10 by default and must be between 2 and 36, or 0.
7869 Returns 0 if the conversion fails.
7870
7871 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7872 to \c false, and success by setting *\a{ok} to \c true.
7873
7874 If \a base is 0, the C language convention is used: if the string begins
7875 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7876 2 is used; otherwise, if the string begins with "0", base 8 is used;
7877 otherwise, base 10 is used.
7878
7879 The string conversion will always happen in the 'C' locale. For
7880 locale-dependent conversion use QLocale::toUShort()
7881
7882 Example:
7883
7884 \snippet qstring/main.cpp 80
7885
7886 This function ignores leading and trailing whitespace.
7887
7888 \note Support for the "0b" prefix was added in Qt 6.4.
7889
7890 \sa number(), toShort(), QLocale::toUShort()
7891*/
7892
7893/*!
7894 Returns the string converted to a \c double value.
7895
7896 Returns an infinity if the conversion overflows or 0.0 if the
7897 conversion fails for other reasons (e.g. underflow).
7898
7899 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7900 to \c false, and success by setting *\a{ok} to \c true.
7901
7902 \snippet qstring/main.cpp 66
7903
7904 \warning The QString content may only contain valid numerical characters
7905 which includes the plus/minus sign, the character e used in scientific
7906 notation, and the decimal point. Including the unit or additional characters
7907 leads to a conversion error.
7908
7909 \snippet qstring/main.cpp 67
7910
7911 The string conversion will always happen in the 'C' locale. For
7912 locale-dependent conversion use QLocale::toDouble()
7913
7914 \snippet qstring/main.cpp 68
7915
7916 For historical reasons, this function does not handle
7917 thousands group separators. If you need to convert such numbers,
7918 use QLocale::toDouble().
7919
7920 \snippet qstring/main.cpp 69
7921
7922 This function ignores leading and trailing whitespace.
7923
7924 \sa number(), QLocale::setDefault(), QLocale::toDouble(), trimmed()
7925*/
7926
7927double QString::toDouble(bool *ok) const
7928{
7929 return QStringView(*this).toDouble(ok);
7930}
7931
7932double QStringView::toDouble(bool *ok) const
7933{
7934 QStringView string = qt_trimmed(s: *this);
7935 QVarLengthArray<uchar> latin1(string.size());
7936 qt_to_latin1(dst: latin1.data(), src: string.utf16(), length: string.size());
7937 auto r = qt_asciiToDouble(num: reinterpret_cast<const char *>(latin1.data()), numLen: string.size());
7938 if (ok != nullptr)
7939 *ok = r.ok();
7940 return r.result;
7941}
7942
7943/*!
7944 Returns the string converted to a \c float value.
7945
7946 Returns an infinity if the conversion overflows or 0.0 if the
7947 conversion fails for other reasons (e.g. underflow).
7948
7949 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7950 to \c false, and success by setting *\a{ok} to \c true.
7951
7952 \warning The QString content may only contain valid numerical characters
7953 which includes the plus/minus sign, the character e used in scientific
7954 notation, and the decimal point. Including the unit or additional characters
7955 leads to a conversion error.
7956
7957 The string conversion will always happen in the 'C' locale. For
7958 locale-dependent conversion use QLocale::toFloat()
7959
7960 For historical reasons, this function does not handle
7961 thousands group separators. If you need to convert such numbers,
7962 use QLocale::toFloat().
7963
7964 Example:
7965
7966 \snippet qstring/main.cpp 71
7967
7968 This function ignores leading and trailing whitespace.
7969
7970 \sa number(), toDouble(), toInt(), QLocale::toFloat(), trimmed()
7971*/
7972
7973float QString::toFloat(bool *ok) const
7974{
7975 return QLocaleData::convertDoubleToFloat(d: toDouble(ok), ok);
7976}
7977
7978float QStringView::toFloat(bool *ok) const
7979{
7980 return QLocaleData::convertDoubleToFloat(d: toDouble(ok), ok);
7981}
7982
7983/*! \fn QString &QString::setNum(int n, int base)
7984
7985 Sets the string to the printed value of \a n in the specified \a
7986 base, and returns a reference to the string.
7987
7988 The base is 10 by default and must be between 2 and 36.
7989
7990 \snippet qstring/main.cpp 56
7991
7992 The formatting always uses QLocale::C, i.e., English/UnitedStates.
7993 To get a localized string representation of a number, use
7994 QLocale::toString() with the appropriate locale.
7995
7996 \sa number()
7997*/
7998
7999/*! \fn QString &QString::setNum(uint n, int base)
8000
8001 \overload
8002*/
8003
8004/*! \fn QString &QString::setNum(long n, int base)
8005
8006 \overload
8007*/
8008
8009/*! \fn QString &QString::setNum(ulong n, int base)
8010
8011 \overload
8012*/
8013
8014/*!
8015 \overload
8016*/
8017QString &QString::setNum(qlonglong n, int base)
8018{
8019 return *this = number(n, base);
8020}
8021
8022/*!
8023 \overload
8024*/
8025QString &QString::setNum(qulonglong n, int base)
8026{
8027 return *this = number(n, base);
8028}
8029
8030/*! \fn QString &QString::setNum(short n, int base)
8031
8032 \overload
8033*/
8034
8035/*! \fn QString &QString::setNum(ushort n, int base)
8036
8037 \overload
8038*/
8039
8040/*!
8041 \overload
8042
8043 Sets the string to the printed value of \a n, formatted according to the
8044 given \a format and \a precision, and returns a reference to the string.
8045
8046 \sa number(), QLocale::FloatingPointPrecisionOption, {Number Formats}
8047*/
8048
8049QString &QString::setNum(double n, char format, int precision)
8050{
8051 return *this = number(n, format, precision);
8052}
8053
8054/*!
8055 \fn QString &QString::setNum(float n, char format, int precision)
8056 \overload
8057
8058 Sets the string to the printed value of \a n, formatted according
8059 to the given \a format and \a precision, and returns a reference
8060 to the string.
8061
8062 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8063 To get a localized string representation of a number, use
8064 QLocale::toString() with the appropriate locale.
8065
8066 \sa number()
8067*/
8068
8069
8070/*!
8071 \fn QString QString::number(long n, int base)
8072
8073 Returns a string equivalent of the number \a n according to the
8074 specified \a base.
8075
8076 The base is 10 by default and must be between 2
8077 and 36. For bases other than 10, \a n is treated as an
8078 unsigned integer.
8079
8080 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8081 To get a localized string representation of a number, use
8082 QLocale::toString() with the appropriate locale.
8083
8084 \snippet qstring/main.cpp 35
8085
8086 \sa setNum()
8087*/
8088
8089QString QString::number(long n, int base)
8090{
8091 return number(qlonglong(n), base);
8092}
8093
8094/*!
8095 \fn QString QString::number(ulong n, int base)
8096
8097 \overload
8098*/
8099QString QString::number(ulong n, int base)
8100{
8101 return number(qulonglong(n), base);
8102}
8103
8104/*!
8105 \overload
8106*/
8107QString QString::number(int n, int base)
8108{
8109 return number(qlonglong(n), base);
8110}
8111
8112/*!
8113 \overload
8114*/
8115QString QString::number(uint n, int base)
8116{
8117 return number(qulonglong(n), base);
8118}
8119
8120/*!
8121 \overload
8122*/
8123QString QString::number(qlonglong n, int base)
8124{
8125#if defined(QT_CHECK_RANGE)
8126 if (base < 2 || base > 36) {
8127 qWarning("QString::setNum: Invalid base (%d)", base);
8128 base = 10;
8129 }
8130#endif
8131 bool negative = n < 0;
8132 /*
8133 Negating std::numeric_limits<qlonglong>::min() hits undefined behavior, so
8134 taking an absolute value has to take a slight detour.
8135 */
8136 return qulltoBasicLatin(l: negative ? 1u + qulonglong(-(n + 1)) : qulonglong(n), base, negative);
8137}
8138
8139/*!
8140 \overload
8141*/
8142QString QString::number(qulonglong n, int base)
8143{
8144#if defined(QT_CHECK_RANGE)
8145 if (base < 2 || base > 36) {
8146 qWarning("QString::setNum: Invalid base (%d)", base);
8147 base = 10;
8148 }
8149#endif
8150 return qulltoBasicLatin(l: n, base, negative: false);
8151}
8152
8153
8154/*!
8155 Returns a string representing the floating-point number \a n.
8156
8157 Returns a string that represents \a n, formatted according to the specified
8158 \a format and \a precision.
8159
8160 For formats with an exponent, the exponent will show its sign and have at
8161 least two digits, left-padding the exponent with zero if needed.
8162
8163 \sa setNum(), QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number Formats}
8164*/
8165QString QString::number(double n, char format, int precision)
8166{
8167 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
8168
8169 switch (QtMiscUtils::toAsciiLower(ch: format)) {
8170 case 'f':
8171 form = QLocaleData::DFDecimal;
8172 break;
8173 case 'e':
8174 form = QLocaleData::DFExponent;
8175 break;
8176 case 'g':
8177 form = QLocaleData::DFSignificantDigits;
8178 break;
8179 default:
8180#if defined(QT_CHECK_RANGE)
8181 qWarning("QString::setNum: Invalid format char '%c'", format);
8182#endif
8183 break;
8184 }
8185
8186 return qdtoBasicLatin(d: n, form, precision, uppercase: isAsciiUpper(c: format));
8187}
8188
8189namespace {
8190template<class ResultList, class StringSource>
8191static ResultList splitString(const StringSource &source, QStringView sep,
8192 Qt::SplitBehavior behavior, Qt::CaseSensitivity cs)
8193{
8194 ResultList list;
8195 typename StringSource::size_type start = 0;
8196 typename StringSource::size_type end;
8197 typename StringSource::size_type extra = 0;
8198 while ((end = QtPrivate::findString(QStringView(source.constData(), source.size()), start + extra, sep, cs)) != -1) {
8199 if (start != end || behavior == Qt::KeepEmptyParts)
8200 list.append(source.sliced(start, end - start));
8201 start = end + sep.size();
8202 extra = (sep.size() == 0 ? 1 : 0);
8203 }
8204 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8205 list.append(source.sliced(start));
8206 return list;
8207}
8208
8209} // namespace
8210
8211/*!
8212 Splits the string into substrings wherever \a sep occurs, and
8213 returns the list of those strings. If \a sep does not match
8214 anywhere in the string, split() returns a single-element list
8215 containing this string.
8216
8217 \a cs specifies whether \a sep should be matched case
8218 sensitively or case insensitively.
8219
8220 If \a behavior is Qt::SkipEmptyParts, empty entries don't
8221 appear in the result. By default, empty entries are kept.
8222
8223 Example:
8224
8225 \snippet qstring/main.cpp 62
8226
8227 If \a sep is empty, split() returns an empty string, followed
8228 by each of the string's characters, followed by another empty string:
8229
8230 \snippet qstring/main.cpp 62-empty
8231
8232 To understand this behavior, recall that the empty string matches
8233 everywhere, so the above is qualitatively the same as:
8234
8235 \snippet qstring/main.cpp 62-slashes
8236
8237 \sa QStringList::join(), section()
8238
8239 \since 5.14
8240*/
8241QStringList QString::split(const QString &sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8242{
8243 return splitString<QStringList>(source: *this, sep, behavior, cs);
8244}
8245
8246/*!
8247 \overload
8248 \since 5.14
8249*/
8250QStringList QString::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8251{
8252 return splitString<QStringList>(source: *this, sep: QStringView(&sep, 1), behavior, cs);
8253}
8254
8255/*!
8256 \fn QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8257 \fn QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8258
8259
8260 Splits the view into substring views wherever \a sep occurs, and
8261 returns the list of those string views.
8262
8263 See QString::split() for how \a sep, \a behavior and \a cs interact to form
8264 the result.
8265
8266 \note All the returned views are valid as long as the data referenced by
8267 this string view is valid. Destroying the data will cause all views to
8268 become dangling.
8269
8270 \since 6.0
8271*/
8272QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8273{
8274 return splitString<QList<QStringView>>(source: QStringView(*this), sep, behavior, cs);
8275}
8276
8277QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8278{
8279 return split(sep: QStringView(&sep, 1), behavior, cs);
8280}
8281
8282#if QT_CONFIG(regularexpression)
8283namespace {
8284template<class ResultList, typename String, typename MatchingFunction>
8285static ResultList splitString(const String &source, const QRegularExpression &re,
8286 MatchingFunction matchingFunction,
8287 Qt::SplitBehavior behavior)
8288{
8289 ResultList list;
8290 if (!re.isValid()) {
8291 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString::split");
8292 return list;
8293 }
8294
8295 qsizetype start = 0;
8296 qsizetype end = 0;
8297 QRegularExpressionMatchIterator iterator = (re.*matchingFunction)(source, 0, QRegularExpression::NormalMatch, QRegularExpression::NoMatchOption);
8298 while (iterator.hasNext()) {
8299 QRegularExpressionMatch match = iterator.next();
8300 end = match.capturedStart();
8301 if (start != end || behavior == Qt::KeepEmptyParts)
8302 list.append(source.sliced(start, end - start));
8303 start = match.capturedEnd();
8304 }
8305
8306 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8307 list.append(source.sliced(start));
8308
8309 return list;
8310}
8311} // namespace
8312
8313/*!
8314 \overload
8315 \since 5.14
8316
8317 Splits the string into substrings wherever the regular expression
8318 \a re matches, and returns the list of those strings. If \a re
8319 does not match anywhere in the string, split() returns a
8320 single-element list containing this string.
8321
8322 Here is an example where we extract the words in a sentence
8323 using one or more whitespace characters as the separator:
8324
8325 \snippet qstring/main.cpp 90
8326
8327 Here is a similar example, but this time we use any sequence of
8328 non-word characters as the separator:
8329
8330 \snippet qstring/main.cpp 91
8331
8332 Here is a third example where we use a zero-length assertion,
8333 \b{\\b} (word boundary), to split the string into an
8334 alternating sequence of non-word and word tokens:
8335
8336 \snippet qstring/main.cpp 92
8337
8338 \sa QStringList::join(), section()
8339*/
8340QStringList QString::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8341{
8342#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
8343 const auto matchingFunction = qOverload<const QString &, qsizetype, QRegularExpression::MatchType, QRegularExpression::MatchOptions>(&QRegularExpression::globalMatch);
8344#else
8345 const auto matchingFunction = &QRegularExpression::globalMatch;
8346#endif
8347 return splitString<QStringList>(source: *this,
8348 re,
8349 matchingFunction,
8350 behavior);
8351}
8352
8353/*!
8354 \overload
8355 \since 6.0
8356
8357 Splits the string into substring views wherever the regular expression \a re
8358 matches, and returns the list of those strings. If \a re does not match
8359 anywhere in the string, split() returns a single-element list containing
8360 this string as view.
8361
8362 \note The views in the returned list are sub-views of this view; as such,
8363 they reference the same data as it and only remain valid for as long as that
8364 data remains live.
8365*/
8366QList<QStringView> QStringView::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8367{
8368 return splitString<QList<QStringView>>(source: *this, re, matchingFunction: &QRegularExpression::globalMatchView, behavior);
8369}
8370
8371#endif // QT_CONFIG(regularexpression)
8372
8373/*!
8374 \enum QString::NormalizationForm
8375
8376 This enum describes the various normalized forms of Unicode text.
8377
8378 \value NormalizationForm_D Canonical Decomposition
8379 \value NormalizationForm_C Canonical Decomposition followed by Canonical Composition
8380 \value NormalizationForm_KD Compatibility Decomposition
8381 \value NormalizationForm_KC Compatibility Decomposition followed by Canonical Composition
8382
8383 \sa normalized(),
8384 {https://www.unicode.org/reports/tr15/}{Unicode Standard Annex #15}
8385*/
8386
8387/*!
8388 \since 4.5
8389
8390 Returns a copy of this string repeated the specified number of \a times.
8391
8392 If \a times is less than 1, an empty string is returned.
8393
8394 Example:
8395
8396 \snippet code/src_corelib_text_qstring.cpp 8
8397*/
8398QString QString::repeated(qsizetype times) const
8399{
8400 if (d.size == 0)
8401 return *this;
8402
8403 if (times <= 1) {
8404 if (times == 1)
8405 return *this;
8406 return QString();
8407 }
8408
8409 const qsizetype resultSize = times * d.size;
8410
8411 QString result;
8412 result.reserve(asize: resultSize);
8413 if (result.capacity() != resultSize)
8414 return QString(); // not enough memory
8415
8416 memcpy(dest: result.d.data(), src: d.data(), n: d.size * sizeof(QChar));
8417
8418 qsizetype sizeSoFar = d.size;
8419 char16_t *end = result.d.data() + sizeSoFar;
8420
8421 const qsizetype halfResultSize = resultSize >> 1;
8422 while (sizeSoFar <= halfResultSize) {
8423 memcpy(dest: end, src: result.d.data(), n: sizeSoFar * sizeof(QChar));
8424 end += sizeSoFar;
8425 sizeSoFar <<= 1;
8426 }
8427 memcpy(dest: end, src: result.d.data(), n: (resultSize - sizeSoFar) * sizeof(QChar));
8428 result.d.data()[resultSize] = '\0';
8429 result.d.size = resultSize;
8430 return result;
8431}
8432
8433void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, qsizetype from)
8434{
8435 {
8436 // check if it's fully ASCII first, because then we have no work
8437 auto start = reinterpret_cast<const char16_t *>(data->constData());
8438 const char16_t *p = start + from;
8439 if (isAscii_helper(ptr&: p, end: p + data->size() - from))
8440 return;
8441 if (p > start + from)
8442 from = p - start - 1; // need one before the non-ASCII to perform NFC
8443 }
8444
8445 if (version == QChar::Unicode_Unassigned) {
8446 version = QChar::currentUnicodeVersion();
8447 } else if (int(version) <= NormalizationCorrectionsVersionMax) {
8448 const QString &s = *data;
8449 QChar *d = nullptr;
8450 for (const NormalizationCorrection &n : uc_normalization_corrections) {
8451 if (n.version > version) {
8452 qsizetype pos = from;
8453 if (QChar::requiresSurrogates(ucs4: n.ucs4)) {
8454 char16_t ucs4High = QChar::highSurrogate(ucs4: n.ucs4);
8455 char16_t ucs4Low = QChar::lowSurrogate(ucs4: n.ucs4);
8456 char16_t oldHigh = QChar::highSurrogate(ucs4: n.old_mapping);
8457 char16_t oldLow = QChar::lowSurrogate(ucs4: n.old_mapping);
8458 while (pos < s.size() - 1) {
8459 if (s.at(i: pos).unicode() == ucs4High && s.at(i: pos + 1).unicode() == ucs4Low) {
8460 if (!d)
8461 d = data->data();
8462 d[pos] = QChar(oldHigh);
8463 d[++pos] = QChar(oldLow);
8464 }
8465 ++pos;
8466 }
8467 } else {
8468 while (pos < s.size()) {
8469 if (s.at(i: pos).unicode() == n.ucs4) {
8470 if (!d)
8471 d = data->data();
8472 d[pos] = QChar(n.old_mapping);
8473 }
8474 ++pos;
8475 }
8476 }
8477 }
8478 }
8479 }
8480
8481 if (normalizationQuickCheckHelper(str: data, mode, from, lastStable: &from))
8482 return;
8483
8484 decomposeHelper(str: data, canonical: mode < QString::NormalizationForm_KD, version, from);
8485
8486 canonicalOrderHelper(str: data, version, from);
8487
8488 if (mode == QString::NormalizationForm_D || mode == QString::NormalizationForm_KD)
8489 return;
8490
8491 composeHelper(str: data, version, from);
8492}
8493
8494/*!
8495 Returns the string in the given Unicode normalization \a mode,
8496 according to the given \a version of the Unicode standard.
8497*/
8498QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersion version) const
8499{
8500 QString copy = *this;
8501 qt_string_normalize(data: &copy, mode, version, from: 0);
8502 return copy;
8503}
8504
8505#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8506static void checkArgEscape(QStringView s)
8507{
8508 // If we're in here, it means that qArgDigitValue has accepted the
8509 // digit. We can skip the check in case we already know it will
8510 // succeed.
8511 if (!supportUnicodeDigitValuesInArg())
8512 return;
8513
8514 const auto isNonAsciiDigit = [](QChar c) {
8515 return c.unicode() < u'0' || c.unicode() > u'9';
8516 };
8517
8518 if (std::any_of(first: s.begin(), last: s.end(), pred: isNonAsciiDigit)) {
8519 const auto accumulateDigit = [](int partial, QChar digit) {
8520 return partial * 10 + digit.digitValue();
8521 };
8522 const int parsedNumber = std::accumulate(first: s.begin(), last: s.end(), init: 0, binary_op: accumulateDigit);
8523
8524 qWarning(msg: "QString::arg(): the replacement \"%%%ls\" contains non-ASCII digits;\n"
8525 " it is currently being interpreted as the %d-th substitution.\n"
8526 " This is deprecated; support for non-ASCII digits will be dropped\n"
8527 " in a future version of Qt.",
8528 qUtf16Printable(s.toString()),
8529 parsedNumber);
8530 }
8531}
8532#endif
8533
8534struct ArgEscapeData
8535{
8536 int min_escape; // lowest escape sequence number
8537 qsizetype occurrences; // number of occurrences of the lowest escape sequence number
8538 qsizetype locale_occurrences; // number of occurrences of the lowest escape sequence number that
8539 // contain 'L'
8540 qsizetype escape_len; // total length of escape sequences which will be replaced
8541};
8542
8543static ArgEscapeData findArgEscapes(QStringView s)
8544{
8545 const QChar *uc_begin = s.begin();
8546 const QChar *uc_end = s.end();
8547
8548 ArgEscapeData d;
8549
8550 d.min_escape = INT_MAX;
8551 d.occurrences = 0;
8552 d.escape_len = 0;
8553 d.locale_occurrences = 0;
8554
8555 const QChar *c = uc_begin;
8556 while (c != uc_end) {
8557 while (c != uc_end && c->unicode() != '%')
8558 ++c;
8559
8560 if (c == uc_end)
8561 break;
8562 const QChar *escape_start = c;
8563 if (++c == uc_end)
8564 break;
8565
8566 bool locale_arg = false;
8567 if (c->unicode() == 'L') {
8568 locale_arg = true;
8569 if (++c == uc_end)
8570 break;
8571 }
8572
8573 int escape = qArgDigitValue(ch: *c);
8574 if (escape == -1)
8575 continue;
8576
8577 // ### Qt 7: do not allow anything but ASCII digits
8578 // in arg()'s replacements.
8579#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8580 const QChar *escapeBegin = c;
8581 const QChar *escapeEnd = escapeBegin + 1;
8582#endif
8583
8584 ++c;
8585
8586 if (c != uc_end) {
8587 const int next_escape = qArgDigitValue(ch: *c);
8588 if (next_escape != -1) {
8589 escape = (10 * escape) + next_escape;
8590 ++c;
8591#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8592 ++escapeEnd;
8593#endif
8594 }
8595 }
8596
8597#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8598 checkArgEscape(s: QStringView(escapeBegin, escapeEnd));
8599#endif
8600
8601 if (escape > d.min_escape)
8602 continue;
8603
8604 if (escape < d.min_escape) {
8605 d.min_escape = escape;
8606 d.occurrences = 0;
8607 d.escape_len = 0;
8608 d.locale_occurrences = 0;
8609 }
8610
8611 ++d.occurrences;
8612 if (locale_arg)
8613 ++d.locale_occurrences;
8614 d.escape_len += c - escape_start;
8615 }
8616 return d;
8617}
8618
8619static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetype field_width,
8620 QStringView arg, QStringView larg, QChar fillChar)
8621{
8622 // Negative field-width for right-padding, positive for left-padding:
8623 const qsizetype abs_field_width = qAbs(t: field_width);
8624 const qsizetype result_len =
8625 s.size() - d.escape_len
8626 + (d.occurrences - d.locale_occurrences) * qMax(a: abs_field_width, b: arg.size())
8627 + d.locale_occurrences * qMax(a: abs_field_width, b: larg.size());
8628
8629 QString result(result_len, Qt::Uninitialized);
8630 QChar *rc = const_cast<QChar *>(result.unicode());
8631 QChar *const result_end = rc + result_len;
8632 qsizetype repl_cnt = 0;
8633
8634 const QChar *c = s.begin();
8635 const QChar *const uc_end = s.end();
8636 while (c != uc_end) {
8637 Q_ASSERT(d.occurrences > repl_cnt);
8638 /* We don't have to check increments of c against uc_end because, as
8639 long as d.occurrences > repl_cnt, we KNOW there are valid escape
8640 sequences remaining. */
8641
8642 const QChar *text_start = c;
8643 while (c->unicode() != '%')
8644 ++c;
8645
8646 const QChar *escape_start = c++;
8647 const bool localize = c->unicode() == 'L';
8648 if (localize)
8649 ++c;
8650
8651 int escape = qArgDigitValue(ch: *c);
8652 if (escape != -1 && c + 1 != uc_end) {
8653 const int digit = qArgDigitValue(ch: c[1]);
8654 if (digit != -1) {
8655 ++c;
8656 escape = 10 * escape + digit;
8657 }
8658 }
8659
8660 if (escape != d.min_escape) {
8661 memcpy(dest: rc, src: text_start, n: (c - text_start) * sizeof(QChar));
8662 rc += c - text_start;
8663 } else {
8664 ++c;
8665
8666 memcpy(dest: rc, src: text_start, n: (escape_start - text_start) * sizeof(QChar));
8667 rc += escape_start - text_start;
8668
8669 const QStringView use = localize ? larg : arg;
8670 const qsizetype pad_chars = abs_field_width - use.size();
8671 // (If negative, relevant loops are no-ops: no need to check.)
8672
8673 if (field_width > 0) { // left padded
8674 rc = std::fill_n(first: rc, n: pad_chars, value: fillChar);
8675 }
8676
8677 if (use.size())
8678 memcpy(dest: rc, src: use.data(), n: use.size() * sizeof(QChar));
8679 rc += use.size();
8680
8681 if (field_width < 0) { // right padded
8682 rc = std::fill_n(first: rc, n: pad_chars, value: fillChar);
8683 }
8684
8685 if (++repl_cnt == d.occurrences) {
8686 memcpy(dest: rc, src: c, n: (uc_end - c) * sizeof(QChar));
8687 rc += uc_end - c;
8688 Q_ASSERT(rc == result_end);
8689 c = uc_end;
8690 }
8691 }
8692 }
8693 Q_ASSERT(rc == result_end);
8694
8695 return result;
8696}
8697
8698/*!
8699 Returns a copy of this string with the lowest numbered place marker
8700 replaced by string \a a, i.e., \c %1, \c %2, ..., \c %99.
8701
8702 \a fieldWidth specifies the minimum amount of space that argument \a
8703 a shall occupy. If \a a requires less space than \a fieldWidth, it
8704 is padded to \a fieldWidth with character \a fillChar. A positive
8705 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
8706 produces left-aligned text.
8707
8708 This example shows how we might create a \c status string for
8709 reporting progress while processing a list of files:
8710
8711 \snippet qstring/main.cpp 11
8712
8713 First, \c arg(i) replaces \c %1. Then \c arg(total) replaces \c
8714 %2. Finally, \c arg(fileName) replaces \c %3.
8715
8716 One advantage of using arg() over asprintf() is that the order of the
8717 numbered place markers can change, if the application's strings are
8718 translated into other languages, but each arg() will still replace
8719 the lowest numbered unreplaced place marker, no matter where it
8720 appears. Also, if place marker \c %i appears more than once in the
8721 string, the arg() replaces all of them.
8722
8723 If there is no unreplaced place marker remaining, a warning message
8724 is output and the result is undefined. Place marker numbers must be
8725 in the range 1 to 99.
8726*/
8727QString QString::arg(const QString &a, int fieldWidth, QChar fillChar) const
8728{
8729 return arg(a: qToStringViewIgnoringNull(s: a), fieldWidth, fillChar);
8730}
8731
8732/*!
8733 \overload
8734 \since 5.10
8735
8736 Returns a copy of this string with the lowest-numbered place-marker
8737 replaced by string \a a, i.e., \c %1, \c %2, ..., \c %99.
8738
8739 \a fieldWidth specifies the minimum amount of space that \a a
8740 shall occupy. If \a a requires less space than \a fieldWidth, it
8741 is padded to \a fieldWidth with character \a fillChar. A positive
8742 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
8743 produces left-aligned text.
8744
8745 This example shows how we might create a \c status string for
8746 reporting progress while processing a list of files:
8747
8748 \snippet qstring/main.cpp 11-qstringview
8749
8750 First, \c arg(i) replaces \c %1. Then \c arg(total) replaces \c
8751 %2. Finally, \c arg(fileName) replaces \c %3.
8752
8753 One advantage of using arg() over asprintf() is that the order of the
8754 numbered place markers can change, if the application's strings are
8755 translated into other languages, but each arg() will still replace
8756 the lowest-numbered unreplaced place-marker, no matter where it
8757 appears. Also, if place-marker \c %i appears more than once in the
8758 string, arg() replaces all of them.
8759
8760 If there is no unreplaced place-marker remaining, a warning message
8761 is printed and the result is undefined. Place-marker numbers must be
8762 in the range 1 to 99.
8763*/
8764QString QString::arg(QStringView a, int fieldWidth, QChar fillChar) const
8765{
8766 ArgEscapeData d = findArgEscapes(s: *this);
8767
8768 if (Q_UNLIKELY(d.occurrences == 0)) {
8769 qWarning(msg: "QString::arg: Argument missing: %ls, %ls", qUtf16Printable(*this),
8770 qUtf16Printable(a.toString()));
8771 return *this;
8772 }
8773 return replaceArgEscapes(s: *this, d, field_width: fieldWidth, arg: a, larg: a, fillChar);
8774}
8775
8776/*!
8777 \overload
8778 \since 5.10
8779
8780 Returns a copy of this string with the lowest-numbered place-marker
8781 replaced by the Latin-1 string viewed by \a a, i.e., \c %1, \c %2, ..., \c %99.
8782
8783 \a fieldWidth specifies the minimum amount of space that \a a
8784 shall occupy. If \a a requires less space than \a fieldWidth, it
8785 is padded to \a fieldWidth with character \a fillChar. A positive
8786 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
8787 produces left-aligned text.
8788
8789 One advantage of using arg() over asprintf() is that the order of the
8790 numbered place markers can change, if the application's strings are
8791 translated into other languages, but each arg() will still replace
8792 the lowest-numbered unreplaced place-marker, no matter where it
8793 appears. Also, if place-marker \c %i appears more than once in the
8794 string, arg() replaces all of them.
8795
8796 If there is no unreplaced place-marker remaining, a warning message
8797 is printed and the result is undefined. Place-marker numbers must be
8798 in the range 1 to 99.
8799*/
8800QString QString::arg(QLatin1StringView a, int fieldWidth, QChar fillChar) const
8801{
8802 QVarLengthArray<char16_t> utf16 = qt_from_latin1_to_qvla(str: a);
8803 return arg(a: QStringView(utf16.data(), utf16.size()), fieldWidth, fillChar);
8804}
8805
8806/*! \fn QString QString::arg(int a, int fieldWidth, int base, QChar fillChar) const
8807 \overload arg()
8808
8809 The \a a argument is expressed in base \a base, which is 10 by
8810 default and must be between 2 and 36. For bases other than 10, \a a
8811 is treated as an unsigned integer.
8812
8813 \a fieldWidth specifies the minimum amount of space that \a a is
8814 padded to and filled with the character \a fillChar. A positive
8815 value produces right-aligned text; a negative value produces
8816 left-aligned text.
8817
8818 The '%' can be followed by an 'L', in which case the sequence is
8819 replaced with a localized representation of \a a. The conversion
8820 uses the default locale, set by QLocale::setDefault(). If no default
8821 locale was specified, the system locale is used. The 'L' flag is
8822 ignored if \a base is not 10.
8823
8824 \snippet qstring/main.cpp 12
8825 \snippet qstring/main.cpp 14
8826
8827 \sa {Number Formats}
8828*/
8829
8830/*! \fn QString QString::arg(uint a, int fieldWidth, int base, QChar fillChar) const
8831 \overload arg()
8832
8833 The \a base argument specifies the base to use when converting the
8834 integer \a a into a string. The base must be between 2 and 36.
8835
8836 \sa {Number Formats}
8837*/
8838
8839/*! \fn QString QString::arg(long a, int fieldWidth, int base, QChar fillChar) const
8840 \overload arg()
8841
8842 \a fieldWidth specifies the minimum amount of space that \a a is
8843 padded to and filled with the character \a fillChar. A positive
8844 value produces right-aligned text; a negative value produces
8845 left-aligned text.
8846
8847 The \a a argument is expressed in the given \a base, which is 10 by
8848 default and must be between 2 and 36.
8849
8850 The '%' can be followed by an 'L', in which case the sequence is
8851 replaced with a localized representation of \a a. The conversion
8852 uses the default locale. The default locale is determined from the
8853 system's locale settings at application startup. It can be changed
8854 using QLocale::setDefault(). The 'L' flag is ignored if \a base is
8855 not 10.
8856
8857 \snippet qstring/main.cpp 12
8858 \snippet qstring/main.cpp 14
8859
8860 \sa {Number Formats}
8861*/
8862
8863/*!
8864 \fn QString QString::arg(ulong a, int fieldWidth, int base, QChar fillChar) const
8865 \overload arg()
8866
8867 \a fieldWidth specifies the minimum amount of space that \a a is
8868 padded to and filled with the character \a fillChar. A positive
8869 value produces right-aligned text; a negative value produces
8870 left-aligned text.
8871
8872 The \a base argument specifies the base to use when converting the
8873 integer \a a to a string. The base must be between 2 and 36, with 8
8874 giving octal, 10 decimal, and 16 hexadecimal numbers.
8875
8876 \sa {Number Formats}
8877*/
8878
8879/*!
8880 \overload arg()
8881
8882 \a fieldWidth specifies the minimum amount of space that \a a is
8883 padded to and filled with the character \a fillChar. A positive
8884 value produces right-aligned text; a negative value produces
8885 left-aligned text.
8886
8887 The \a base argument specifies the base to use when converting the
8888 integer \a a into a string. The base must be between 2 and 36, with
8889 8 giving octal, 10 decimal, and 16 hexadecimal numbers.
8890
8891 \sa {Number Formats}
8892*/
8893QString QString::arg(qlonglong a, int fieldWidth, int base, QChar fillChar) const
8894{
8895 ArgEscapeData d = findArgEscapes(s: *this);
8896
8897 if (d.occurrences == 0) {
8898 qWarning() << "QString::arg: Argument missing:" << *this << ',' << a;
8899 return *this;
8900 }
8901
8902 unsigned flags = QLocaleData::NoFlags;
8903 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8904 if (fillChar == u'0')
8905 flags = QLocaleData::ZeroPadded;
8906
8907 QString arg;
8908 if (d.occurrences > d.locale_occurrences) {
8909 arg = QLocaleData::c()->longLongToString(l: a, precision: -1, base, width: fieldWidth, flags);
8910 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8911 }
8912
8913 QString localeArg;
8914 if (d.locale_occurrences > 0) {
8915 QLocale locale;
8916 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8917 flags |= QLocaleData::GroupDigits;
8918 localeArg = locale.d->m_data->longLongToString(l: a, precision: -1, base, width: fieldWidth, flags);
8919 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8920 }
8921
8922 return replaceArgEscapes(s: *this, d, field_width: fieldWidth, arg, larg: localeArg, fillChar);
8923}
8924
8925/*!
8926 \overload arg()
8927
8928 \a fieldWidth specifies the minimum amount of space that \a a is
8929 padded to and filled with the character \a fillChar. A positive
8930 value produces right-aligned text; a negative value produces
8931 left-aligned text.
8932
8933 The \a base argument specifies the base to use when converting the
8934 integer \a a into a string. \a base must be between 2 and 36, with 8
8935 giving octal, 10 decimal, and 16 hexadecimal numbers.
8936
8937 \sa {Number Formats}
8938*/
8939QString QString::arg(qulonglong a, int fieldWidth, int base, QChar fillChar) const
8940{
8941 ArgEscapeData d = findArgEscapes(s: *this);
8942
8943 if (d.occurrences == 0) {
8944 qWarning() << "QString::arg: Argument missing:" << *this << ',' << a;
8945 return *this;
8946 }
8947
8948 unsigned flags = QLocaleData::NoFlags;
8949 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8950 if (fillChar == u'0')
8951 flags = QLocaleData::ZeroPadded;
8952
8953 QString arg;
8954 if (d.occurrences > d.locale_occurrences) {
8955 arg = QLocaleData::c()->unsLongLongToString(l: a, precision: -1, base, width: fieldWidth, flags);
8956 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8957 }
8958
8959 QString localeArg;
8960 if (d.locale_occurrences > 0) {
8961 QLocale locale;
8962 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8963 flags |= QLocaleData::GroupDigits;
8964 localeArg = locale.d->m_data->unsLongLongToString(l: a, precision: -1, base, width: fieldWidth, flags);
8965 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8966 }
8967
8968 return replaceArgEscapes(s: *this, d, field_width: fieldWidth, arg, larg: localeArg, fillChar);
8969}
8970
8971/*!
8972 \overload arg()
8973
8974 \fn QString QString::arg(short a, int fieldWidth, int base, QChar fillChar) const
8975
8976 \a fieldWidth specifies the minimum amount of space that \a a is
8977 padded to and filled with the character \a fillChar. A positive
8978 value produces right-aligned text; a negative value produces
8979 left-aligned text.
8980
8981 The \a base argument specifies the base to use when converting the
8982 integer \a a into a string. The base must be between 2 and 36, with
8983 8 giving octal, 10 decimal, and 16 hexadecimal numbers.
8984
8985 \sa {Number Formats}
8986*/
8987
8988/*!
8989 \fn QString QString::arg(ushort a, int fieldWidth, int base, QChar fillChar) const
8990 \overload arg()
8991
8992 \a fieldWidth specifies the minimum amount of space that \a a is
8993 padded to and filled with the character \a fillChar. A positive
8994 value produces right-aligned text; a negative value produces
8995 left-aligned text.
8996
8997 The \a base argument specifies the base to use when converting the
8998 integer \a a into a string. The base must be between 2 and 36, with
8999 8 giving octal, 10 decimal, and 16 hexadecimal numbers.
9000
9001 \sa {Number Formats}
9002*/
9003
9004/*!
9005 \overload arg()
9006*/
9007QString QString::arg(QChar a, int fieldWidth, QChar fillChar) const
9008{
9009 return arg(a: QStringView{&a, 1}, fieldWidth, fillChar);
9010}
9011
9012/*!
9013 \overload arg()
9014
9015 The \a a argument is interpreted as a Latin-1 character.
9016*/
9017QString QString::arg(char a, int fieldWidth, QChar fillChar) const
9018{
9019 return arg(a: QLatin1Char(a), fieldWidth, fillChar);
9020}
9021
9022/*!
9023 \overload arg()
9024
9025 Argument \a a is formatted according to the specified \a format and
9026 \a precision. See \l{Floating-point Formats} for details.
9027
9028 \a fieldWidth specifies the minimum amount of space that \a a is
9029 padded to and filled with the character \a fillChar. A positive
9030 value produces right-aligned text; a negative value produces
9031 left-aligned text.
9032
9033 \snippet code/src_corelib_text_qstring.cpp 2
9034
9035 \sa QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number Formats}
9036*/
9037QString QString::arg(double a, int fieldWidth, char format, int precision, QChar fillChar) const
9038{
9039 ArgEscapeData d = findArgEscapes(s: *this);
9040
9041 if (d.occurrences == 0) {
9042 qWarning(msg: "QString::arg: Argument missing: %s, %g", toLocal8Bit().data(), a);
9043 return *this;
9044 }
9045
9046 unsigned flags = QLocaleData::NoFlags;
9047 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
9048 if (fillChar == u'0')
9049 flags |= QLocaleData::ZeroPadded;
9050
9051 if (isAsciiUpper(c: format))
9052 flags |= QLocaleData::CapitalEorX;
9053
9054 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
9055 switch (QtMiscUtils::toAsciiLower(ch: format)) {
9056 case 'f':
9057 form = QLocaleData::DFDecimal;
9058 break;
9059 case 'e':
9060 form = QLocaleData::DFExponent;
9061 break;
9062 case 'g':
9063 form = QLocaleData::DFSignificantDigits;
9064 break;
9065 default:
9066#if defined(QT_CHECK_RANGE)
9067 qWarning("QString::arg: Invalid format char '%c'", format);
9068#endif
9069 break;
9070 }
9071
9072 QString arg;
9073 if (d.occurrences > d.locale_occurrences) {
9074 arg = QLocaleData::c()->doubleToString(d: a, precision, form, width: fieldWidth,
9075 flags: flags | QLocaleData::ZeroPadExponent);
9076 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
9077 || fieldWidth <= arg.size());
9078 }
9079
9080 QString localeArg;
9081 if (d.locale_occurrences > 0) {
9082 QLocale locale;
9083
9084 const QLocale::NumberOptions numberOptions = locale.numberOptions();
9085 if (!(numberOptions & QLocale::OmitGroupSeparator))
9086 flags |= QLocaleData::GroupDigits;
9087 if (!(numberOptions & QLocale::OmitLeadingZeroInExponent))
9088 flags |= QLocaleData::ZeroPadExponent;
9089 if (numberOptions & QLocale::IncludeTrailingZeroesAfterDot)
9090 flags |= QLocaleData::AddTrailingZeroes;
9091 localeArg = locale.d->m_data->doubleToString(d: a, precision, form, width: fieldWidth, flags);
9092 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
9093 || fieldWidth <= localeArg.size());
9094 }
9095
9096 return replaceArgEscapes(s: *this, d, field_width: fieldWidth, arg, larg: localeArg, fillChar);
9097}
9098
9099static inline char16_t to_unicode(const QChar c) { return c.unicode(); }
9100static inline char16_t to_unicode(const char c) { return QLatin1Char{c}.unicode(); }
9101
9102template <typename Char>
9103static int getEscape(const Char *uc, qsizetype *pos, qsizetype len)
9104{
9105 qsizetype i = *pos;
9106 ++i;
9107 if (i < len && uc[i] == u'L')
9108 ++i;
9109 if (i < len) {
9110 int escape = to_unicode(uc[i]) - '0';
9111 if (uint(escape) >= 10U)
9112 return -1;
9113 ++i;
9114 if (i < len) {
9115 // there's a second digit
9116 int digit = to_unicode(uc[i]) - '0';
9117 if (uint(digit) < 10U) {
9118 escape = (escape * 10) + digit;
9119 ++i;
9120 }
9121 }
9122 *pos = i;
9123 return escape;
9124 }
9125 return -1;
9126}
9127
9128/*
9129 Algorithm for multiArg:
9130
9131 1. Parse the string as a sequence of verbatim text and placeholders (%L?\d{,3}).
9132 The L is parsed and accepted for compatibility with non-multi-arg, but since
9133 multiArg only accepts strings as replacements, the localization request can
9134 be safely ignored.
9135 2. The result of step (1) is a list of (string-ref,int)-tuples. The string-ref
9136 either points at text to be copied verbatim (in which case the int is -1),
9137 or, initially, at the textual representation of the placeholder. In that case,
9138 the int contains the numerical number as parsed from the placeholder.
9139 3. Next, collect all the non-negative ints found, sort them in ascending order and
9140 remove duplicates.
9141 3a. If the result has more entries than multiArg() was given replacement strings,
9142 we have found placeholders we can't satisfy with replacement strings. That is
9143 fine (there could be another .arg() call coming after this one), so just
9144 truncate the result to the number of actual multiArg() replacement strings.
9145 3b. If the result has less entries than multiArg() was given replacement strings,
9146 the string is missing placeholders. This is an error that the user should be
9147 warned about.
9148 4. The result of step (3) is a mapping from the index of any replacement string to
9149 placeholder number. This is the wrong way around, but since placeholder
9150 numbers could get as large as 999, while we typically don't have more than 9
9151 replacement strings, we trade 4K of sparsely-used memory for doing a reverse lookup
9152 each time we need to map a placeholder number to a replacement string index
9153 (that's a linear search; but still *much* faster than using an associative container).
9154 5. Next, for each of the tuples found in step (1), do the following:
9155 5a. If the int is negative, do nothing.
9156 5b. Otherwise, if the int is found in the result of step (3) at index I, replace
9157 the string-ref with a string-ref for the (complete) I'th replacement string.
9158 5c. Otherwise, do nothing.
9159 6. Concatenate all string refs into a single result string.
9160*/
9161
9162namespace {
9163struct Part
9164{
9165 Part() = default; // for QVarLengthArray; do not use
9166 constexpr Part(QAnyStringView s, int num = -1)
9167 : string{s}, number{num} {}
9168
9169 void reset(QAnyStringView s) noexcept { *this = {s, number}; }
9170
9171 QAnyStringView string;
9172 int number;
9173};
9174} // unnamed namespace
9175
9176Q_DECLARE_TYPEINFO(Part, Q_PRIMITIVE_TYPE);
9177
9178namespace {
9179
9180enum { ExpectedParts = 32 };
9181
9182typedef QVarLengthArray<Part, ExpectedParts> ParseResult;
9183typedef QVarLengthArray<int, ExpectedParts/2> ArgIndexToPlaceholderMap;
9184
9185template <typename StringView>
9186static ParseResult parseMultiArgFormatString(StringView s)
9187{
9188 ParseResult result;
9189
9190 const auto uc = s.data();
9191 const auto len = s.size();
9192 const auto end = len - 1;
9193 qsizetype i = 0;
9194 qsizetype last = 0;
9195
9196 while (i < end) {
9197 if (uc[i] == u'%') {
9198 qsizetype percent = i;
9199 int number = getEscape(uc, &i, len);
9200 if (number != -1) {
9201 if (last != percent)
9202 result.push_back(t: Part{s.sliced(last, percent - last)}); // literal text (incl. failed placeholders)
9203 result.push_back(t: Part{s.sliced(percent, i - percent), number}); // parsed placeholder
9204 last = i;
9205 continue;
9206 }
9207 }
9208 ++i;
9209 }
9210
9211 if (last < len)
9212 result.push_back(t: Part{s.sliced(last, len - last)}); // trailing literal text
9213
9214 return result;
9215}
9216
9217static ArgIndexToPlaceholderMap makeArgIndexToPlaceholderMap(const ParseResult &parts)
9218{
9219 ArgIndexToPlaceholderMap result;
9220
9221 for (const Part &part : parts) {
9222 if (part.number >= 0)
9223 result.push_back(t: part.number);
9224 }
9225
9226 std::sort(first: result.begin(), last: result.end());
9227 result.erase(abegin: std::unique(first: result.begin(), last: result.end()),
9228 aend: result.end());
9229
9230 return result;
9231}
9232
9233static qsizetype resolveStringRefsAndReturnTotalSize(ParseResult &parts, const ArgIndexToPlaceholderMap &argIndexToPlaceholderMap, const QtPrivate::ArgBase *args[])
9234{
9235 using namespace QtPrivate;
9236 qsizetype totalSize = 0;
9237 for (Part &part : parts) {
9238 if (part.number != -1) {
9239 const auto it = std::find(first: argIndexToPlaceholderMap.begin(), last: argIndexToPlaceholderMap.end(), val: part.number);
9240 if (it != argIndexToPlaceholderMap.end()) {
9241 const auto &arg = *args[it - argIndexToPlaceholderMap.begin()];
9242 switch (arg.tag) {
9243 case ArgBase::L1:
9244 part.reset(s: static_cast<const QLatin1StringArg&>(arg).string);
9245 break;
9246 case ArgBase::U8:
9247 Q_UNREACHABLE(); // waiting for QUtf8String...
9248 break;
9249 case ArgBase::U16:
9250 part.reset(s: static_cast<const QStringViewArg&>(arg).string);
9251 break;
9252 }
9253 }
9254 }
9255 totalSize += part.string.size();
9256 }
9257 return totalSize;
9258}
9259
9260} // unnamed namespace
9261
9262template <typename StringView>
9263static QString argToQStringImpl(StringView pattern, size_t numArgs, const QtPrivate::ArgBase **args)
9264{
9265 // Step 1-2 above
9266 ParseResult parts = parseMultiArgFormatString(pattern);
9267
9268 // 3-4
9269 ArgIndexToPlaceholderMap argIndexToPlaceholderMap = makeArgIndexToPlaceholderMap(parts);
9270
9271 if (static_cast<size_t>(argIndexToPlaceholderMap.size()) > numArgs) // 3a
9272 argIndexToPlaceholderMap.resize(sz: qsizetype(numArgs));
9273 else if (Q_UNLIKELY(static_cast<size_t>(argIndexToPlaceholderMap.size()) < numArgs)) // 3b
9274 qWarning(msg: "QString::arg: %d argument(s) missing in %ls",
9275 int(numArgs - argIndexToPlaceholderMap.size()), qUtf16Printable(pattern.toString()));
9276
9277 // 5
9278 const qsizetype totalSize = resolveStringRefsAndReturnTotalSize(parts, argIndexToPlaceholderMap, args);
9279
9280 // 6:
9281 QString result(totalSize, Qt::Uninitialized);
9282 auto out = const_cast<QChar*>(result.constData());
9283
9284 struct Concatenate {
9285 QChar *out;
9286 QChar *operator()(QLatin1String part) noexcept
9287 {
9288 if (part.size()) {
9289 qt_from_latin1(dst: reinterpret_cast<char16_t*>(out),
9290 str: part.data(), size: part.size());
9291 }
9292 return out + part.size();
9293 }
9294 QChar *operator()(QUtf8StringView part) noexcept
9295 {
9296 return QUtf8::convertToUnicode(buffer: out, in: part);
9297 }
9298 QChar *operator()(QStringView part) noexcept
9299 {
9300 if (part.size())
9301 memcpy(dest: out, src: part.data(), n: part.size() * sizeof(QChar));
9302 return out + part.size();
9303 }
9304 };
9305
9306 for (const Part &part : parts)
9307 out = part.string.visit(Concatenate{out});
9308
9309 // UTF-8 decoding may have caused an overestimate of totalSize - correct it:
9310 result.truncate(pos: out - result.cbegin());
9311
9312 return result;
9313}
9314
9315QString QtPrivate::argToQString(QStringView pattern, size_t n, const ArgBase **args)
9316{
9317 return argToQStringImpl(pattern, numArgs: n, args);
9318}
9319
9320QString QtPrivate::argToQString(QLatin1StringView pattern, size_t n, const ArgBase **args)
9321{
9322 return argToQStringImpl(pattern, numArgs: n, args);
9323}
9324
9325/*! \fn bool QString::isRightToLeft() const
9326
9327 Returns \c true if the string is read right to left.
9328
9329 \sa QStringView::isRightToLeft()
9330*/
9331bool QString::isRightToLeft() const
9332{
9333 return QtPrivate::isRightToLeft(string: QStringView(*this));
9334}
9335
9336/*!
9337 \fn bool QString::isValidUtf16() const noexcept
9338 \since 5.15
9339
9340 Returns \c true if the string contains valid UTF-16 encoded data,
9341 or \c false otherwise.
9342
9343 Note that this function does not perform any special validation of the
9344 data; it merely checks if it can be successfully decoded from UTF-16.
9345 The data is assumed to be in host byte order; the presence of a BOM
9346 is meaningless.
9347
9348 \sa QStringView::isValidUtf16()
9349*/
9350
9351/*! \fn QChar *QString::data()
9352
9353 Returns a pointer to the data stored in the QString. The pointer
9354 can be used to access and modify the characters that compose the
9355 string.
9356
9357 Unlike constData() and unicode(), the returned data is always
9358 '\\0'-terminated.
9359
9360 Example:
9361
9362 \snippet qstring/main.cpp 19
9363
9364 Note that the pointer remains valid only as long as the string is
9365 not modified by other means. For read-only access, constData() is
9366 faster because it never causes a \l{deep copy} to occur.
9367
9368 \sa constData(), operator[]()
9369*/
9370
9371/*! \fn const QChar *QString::data() const
9372
9373 \overload
9374
9375 \note The returned string may not be '\\0'-terminated.
9376 Use size() to determine the length of the array.
9377
9378 \sa fromRawData()
9379*/
9380
9381/*! \fn const QChar *QString::constData() const
9382
9383 Returns a pointer to the data stored in the QString. The pointer
9384 can be used to access the characters that compose the string.
9385
9386 Note that the pointer remains valid only as long as the string is
9387 not modified.
9388
9389 \note The returned string may not be '\\0'-terminated.
9390 Use size() to determine the length of the array.
9391
9392 \sa data(), operator[](), fromRawData()
9393*/
9394
9395/*! \fn void QString::push_front(const QString &other)
9396
9397 This function is provided for STL compatibility, prepending the
9398 given \a other string to the beginning of this string. It is
9399 equivalent to \c prepend(other).
9400
9401 \sa prepend()
9402*/
9403
9404/*! \fn void QString::push_front(QChar ch)
9405
9406 \overload
9407
9408 Prepends the given \a ch character to the beginning of this string.
9409*/
9410
9411/*! \fn void QString::push_back(const QString &other)
9412
9413 This function is provided for STL compatibility, appending the
9414 given \a other string onto the end of this string. It is
9415 equivalent to \c append(other).
9416
9417 \sa append()
9418*/
9419
9420/*! \fn void QString::push_back(QChar ch)
9421
9422 \overload
9423
9424 Appends the given \a ch character onto the end of this string.
9425*/
9426
9427/*!
9428 \since 6.1
9429
9430 Removes from the string the characters in the half-open range
9431 [ \a first , \a last ). Returns an iterator to the character
9432 immediately after the last erased character (i.e. the character
9433 referred to by \a last before the erase).
9434*/
9435QString::iterator QString::erase(QString::const_iterator first, QString::const_iterator last)
9436{
9437 const auto start = std::distance(first: cbegin(), last: first);
9438 const auto len = std::distance(first: first, last: last);
9439 remove(pos: start, len);
9440 return begin() + start;
9441}
9442
9443/*!
9444 \fn QString::iterator QString::erase(QString::const_iterator it)
9445
9446 \overload
9447 \since 6.5
9448
9449 Removes the character denoted by \c it from the string.
9450 Returns an iterator to the character immediately after the
9451 erased character.
9452
9453 \code
9454 QString c = "abcdefg";
9455 auto it = c.erase(c.cbegin()); // c is now "bcdefg"; "it" points to "b"
9456 \endcode
9457*/
9458
9459/*! \fn void QString::shrink_to_fit()
9460 \since 5.10
9461
9462 This function is provided for STL compatibility. It is
9463 equivalent to squeeze().
9464
9465 \sa squeeze()
9466*/
9467
9468/*!
9469 \fn std::string QString::toStdString() const
9470
9471 Returns a std::string object with the data contained in this
9472 QString. The Unicode data is converted into 8-bit characters using
9473 the toUtf8() function.
9474
9475 This method is mostly useful to pass a QString to a function
9476 that accepts a std::string object.
9477
9478 \sa toLatin1(), toUtf8(), toLocal8Bit(), QByteArray::toStdString()
9479*/
9480
9481/*!
9482 Constructs a QString that uses the first \a size Unicode characters
9483 in the array \a unicode. The data in \a unicode is \e not
9484 copied. The caller must be able to guarantee that \a unicode will
9485 not be deleted or modified as long as the QString (or an
9486 unmodified copy of it) exists.
9487
9488 Any attempts to modify the QString or copies of it will cause it
9489 to create a deep copy of the data, ensuring that the raw data
9490 isn't modified.
9491
9492 Here is an example of how we can use a QRegularExpression on raw data in
9493 memory without requiring to copy the data into a QString:
9494
9495 \snippet qstring/main.cpp 22
9496 \snippet qstring/main.cpp 23
9497
9498 \warning A string created with fromRawData() is \e not
9499 '\\0'-terminated, unless the raw data contains a '\\0' character
9500 at position \a size. This means unicode() will \e not return a
9501 '\\0'-terminated string (although utf16() does, at the cost of
9502 copying the raw data).
9503
9504 \sa fromUtf16(), setRawData()
9505*/
9506QString QString::fromRawData(const QChar *unicode, qsizetype size)
9507{
9508 return QString(DataPointer::fromRawData(rawData: const_cast<char16_t *>(reinterpret_cast<const char16_t *>(unicode)), length: size));
9509}
9510
9511/*!
9512 \since 4.7
9513
9514 Resets the QString to use the first \a size Unicode characters
9515 in the array \a unicode. The data in \a unicode is \e not
9516 copied. The caller must be able to guarantee that \a unicode will
9517 not be deleted or modified as long as the QString (or an
9518 unmodified copy of it) exists.
9519
9520 This function can be used instead of fromRawData() to re-use
9521 existings QString objects to save memory re-allocations.
9522
9523 \sa fromRawData()
9524*/
9525QString &QString::setRawData(const QChar *unicode, qsizetype size)
9526{
9527 if (!unicode || !size) {
9528 clear();
9529 }
9530 *this = fromRawData(unicode, size);
9531 return *this;
9532}
9533
9534/*! \fn QString QString::fromStdU16String(const std::u16string &str)
9535 \since 5.5
9536
9537 \include qstring.cpp {from-std-string} {UTF-16} {fromUtf16()}
9538
9539 \sa fromUtf16(), fromStdWString(), fromStdU32String()
9540*/
9541
9542/*!
9543 \fn std::u16string QString::toStdU16String() const
9544 \since 5.5
9545
9546 Returns a std::u16string object with the data contained in this
9547 QString. The Unicode data is the same as returned by the utf16()
9548 method.
9549
9550 \sa utf16(), toStdWString(), toStdU32String()
9551*/
9552
9553/*! \fn QString QString::fromStdU32String(const std::u32string &str)
9554 \since 5.5
9555
9556 \include qstring.cpp {from-std-string} {UTF-32} {fromUcs4()}
9557
9558 \sa fromUcs4(), fromStdWString(), fromStdU16String()
9559*/
9560
9561/*!
9562 \fn std::u32string QString::toStdU32String() const
9563 \since 5.5
9564
9565 Returns a std::u32string object with the data contained in this
9566 QString. The Unicode data is the same as returned by the toUcs4()
9567 method.
9568
9569 \sa toUcs4(), toStdWString(), toStdU16String()
9570*/
9571
9572#if !defined(QT_NO_DATASTREAM)
9573/*!
9574 \fn QDataStream &operator<<(QDataStream &stream, const QString &string)
9575 \relates QString
9576
9577 Writes the given \a string to the specified \a stream.
9578
9579 \sa {Serializing Qt Data Types}
9580*/
9581
9582QDataStream &operator<<(QDataStream &out, const QString &str)
9583{
9584 if (out.version() == 1) {
9585 out << str.toLatin1();
9586 } else {
9587 if (!str.isNull() || out.version() < 3) {
9588 if ((out.byteOrder() == QDataStream::BigEndian) == (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9589 out.writeBytes(reinterpret_cast<const char *>(str.unicode()),
9590 len: static_cast<qsizetype>(sizeof(QChar) * str.size()));
9591 } else {
9592 QVarLengthArray<char16_t> buffer(str.size());
9593 qbswap<sizeof(char16_t)>(source: str.constData(), count: str.size(), dest: buffer.data());
9594 out.writeBytes(reinterpret_cast<const char *>(buffer.data()),
9595 len: static_cast<qsizetype>(sizeof(char16_t) * buffer.size()));
9596 }
9597 } else {
9598 QDataStream::writeQSizeType(s&: out, value: -1); // write null marker
9599 }
9600 }
9601 return out;
9602}
9603
9604/*!
9605 \fn QDataStream &operator>>(QDataStream &stream, QString &string)
9606 \relates QString
9607
9608 Reads a string from the specified \a stream into the given \a string.
9609
9610 \sa {Serializing Qt Data Types}
9611*/
9612
9613QDataStream &operator>>(QDataStream &in, QString &str)
9614{
9615 if (in.version() == 1) {
9616 QByteArray l;
9617 in >> l;
9618 str = QString::fromLatin1(ba: l);
9619 } else {
9620 qint64 size = QDataStream::readQSizeType(s&: in);
9621 qsizetype bytes = size;
9622 if (size != bytes || size < -1) {
9623 str.clear();
9624 in.setStatus(QDataStream::SizeLimitExceeded);
9625 return in;
9626 }
9627 if (bytes == -1) { // null string
9628 str = QString();
9629 } else if (bytes > 0) {
9630 if (bytes & 0x1) {
9631 str.clear();
9632 in.setStatus(QDataStream::ReadCorruptData);
9633 return in;
9634 }
9635
9636 const qsizetype Step = 1024 * 1024;
9637 qsizetype len = bytes / 2;
9638 qsizetype allocated = 0;
9639
9640 while (allocated < len) {
9641 int blockSize = qMin(a: Step, b: len - allocated);
9642 str.resize(size: allocated + blockSize);
9643 if (in.readRawData(reinterpret_cast<char *>(str.data()) + allocated * 2,
9644 len: blockSize * 2) != blockSize * 2) {
9645 str.clear();
9646 in.setStatus(QDataStream::ReadPastEnd);
9647 return in;
9648 }
9649 allocated += blockSize;
9650 }
9651
9652 if ((in.byteOrder() == QDataStream::BigEndian)
9653 != (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9654 char16_t *data = reinterpret_cast<char16_t *>(str.data());
9655 qbswap<sizeof(*data)>(source: data, count: len, dest: data);
9656 }
9657 } else {
9658 str = QString(QLatin1StringView(""));
9659 }
9660 }
9661 return in;
9662}
9663#endif // QT_NO_DATASTREAM
9664
9665/*!
9666 \typedef QString::Data
9667 \internal
9668*/
9669
9670/*!
9671 \typedef QString::DataPtr
9672 \internal
9673*/
9674
9675/*!
9676 \fn DataPtr & QString::data_ptr()
9677 \internal
9678*/
9679
9680/*!
9681 \since 5.11
9682 \internal
9683 \relates QStringView
9684
9685 Returns \c true if the string is read right to left.
9686
9687 \sa QString::isRightToLeft()
9688*/
9689bool QtPrivate::isRightToLeft(QStringView string) noexcept
9690{
9691 int isolateLevel = 0;
9692
9693 for (QStringIterator i(string); i.hasNext();) {
9694 const char32_t c = i.next();
9695
9696 switch (QChar::direction(ucs4: c)) {
9697 case QChar::DirRLI:
9698 case QChar::DirLRI:
9699 case QChar::DirFSI:
9700 ++isolateLevel;
9701 break;
9702 case QChar::DirPDI:
9703 if (isolateLevel)
9704 --isolateLevel;
9705 break;
9706 case QChar::DirL:
9707 if (isolateLevel)
9708 break;
9709 return false;
9710 case QChar::DirR:
9711 case QChar::DirAL:
9712 if (isolateLevel)
9713 break;
9714 return true;
9715 case QChar::DirEN:
9716 case QChar::DirES:
9717 case QChar::DirET:
9718 case QChar::DirAN:
9719 case QChar::DirCS:
9720 case QChar::DirB:
9721 case QChar::DirS:
9722 case QChar::DirWS:
9723 case QChar::DirON:
9724 case QChar::DirLRE:
9725 case QChar::DirLRO:
9726 case QChar::DirRLE:
9727 case QChar::DirRLO:
9728 case QChar::DirPDF:
9729 case QChar::DirNSM:
9730 case QChar::DirBN:
9731 break;
9732 }
9733 }
9734 return false;
9735}
9736
9737qsizetype QtPrivate::count(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9738{
9739 qsizetype num = 0;
9740 qsizetype i = -1;
9741 if (haystack.size() > 500 && needle.size() > 5) {
9742 QStringMatcher matcher(needle, cs);
9743 while ((i = matcher.indexIn(str: haystack, from: i + 1)) != -1)
9744 ++num;
9745 } else {
9746 while ((i = QtPrivate::findString(haystack, from: i + 1, needle, cs)) != -1)
9747 ++num;
9748 }
9749 return num;
9750}
9751
9752qsizetype QtPrivate::count(QStringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9753{
9754 if (cs == Qt::CaseSensitive)
9755 return std::count(first: haystack.cbegin(), last: haystack.cend(), value: needle);
9756
9757 needle = foldCase(ch: needle);
9758 return std::count_if(first: haystack.cbegin(), last: haystack.cend(),
9759 pred: [needle](const QChar c) { return foldAndCompare(a: c, b: needle); });
9760}
9761
9762qsizetype QtPrivate::count(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9763{
9764 qsizetype num = 0;
9765 qsizetype i = -1;
9766
9767 QLatin1StringMatcher matcher(needle, cs);
9768 while ((i = matcher.indexIn(haystack, from: i + 1)) != -1)
9769 ++num;
9770
9771 return num;
9772}
9773
9774qsizetype QtPrivate::count(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9775{
9776 if (haystack.size() < needle.size())
9777 return 0;
9778
9779 if (!QtPrivate::isLatin1(s: needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9780 return 0;
9781
9782 qsizetype num = 0;
9783 qsizetype i = -1;
9784
9785 QVarLengthArray<uchar> s(needle.size());
9786 qt_to_latin1_unchecked(dst: s.data(), src: needle.utf16(), length: needle.size());
9787
9788 QLatin1StringMatcher matcher(QLatin1StringView(reinterpret_cast<char *>(s.data()), s.size()),
9789 cs);
9790 while ((i = matcher.indexIn(haystack, from: i + 1)) != -1)
9791 ++num;
9792
9793 return num;
9794}
9795
9796qsizetype QtPrivate::count(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9797{
9798 if (haystack.size() < needle.size())
9799 return -1;
9800
9801 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(str: needle);
9802 return QtPrivate::count(haystack, needle: QStringView(s.data(), s.size()), cs);
9803}
9804
9805qsizetype QtPrivate::count(QLatin1StringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9806{
9807 // non-L1 needles cannot possibly match in L1-only haystacks
9808 if (needle.unicode() > 0xff)
9809 return 0;
9810
9811 if (cs == Qt::CaseSensitive) {
9812 return std::count(first: haystack.cbegin(), last: haystack.cend(), value: needle.toLatin1());
9813 } else {
9814 return std::count_if(first: haystack.cbegin(), last: haystack.cend(),
9815 pred: CaseInsensitiveL1::matcher(ch: needle.toLatin1()));
9816 }
9817}
9818
9819/*!
9820 \fn bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9821 \since 5.10
9822 \fn bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9823 \since 5.10
9824 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9825 \since 5.10
9826 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9827 \since 5.10
9828 \internal
9829 \relates QStringView
9830
9831 Returns \c true if \a haystack starts with \a needle,
9832 otherwise returns \c false.
9833
9834 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9835
9836 \sa QtPrivate::endsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9837*/
9838
9839bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9840{
9841 return qt_starts_with_impl(haystack, needle, cs);
9842}
9843
9844bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9845{
9846 return qt_starts_with_impl(haystack, needle, cs);
9847}
9848
9849bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9850{
9851 return qt_starts_with_impl(haystack, needle, cs);
9852}
9853
9854bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9855{
9856 return qt_starts_with_impl(haystack, needle, cs);
9857}
9858
9859/*!
9860 \fn bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9861 \since 5.10
9862 \fn bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9863 \since 5.10
9864 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9865 \since 5.10
9866 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9867 \since 5.10
9868 \internal
9869 \relates QStringView
9870
9871 Returns \c true if \a haystack ends with \a needle,
9872 otherwise returns \c false.
9873
9874 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9875
9876 \sa QtPrivate::startsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9877*/
9878
9879bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9880{
9881 return qt_ends_with_impl(haystack, needle, cs);
9882}
9883
9884bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9885{
9886 return qt_ends_with_impl(haystack, needle, cs);
9887}
9888
9889bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9890{
9891 return qt_ends_with_impl(haystack, needle, cs);
9892}
9893
9894bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9895{
9896 return qt_ends_with_impl(haystack, needle, cs);
9897}
9898
9899qsizetype QtPrivate::findString(QStringView haystack0, qsizetype from, QStringView needle0, Qt::CaseSensitivity cs) noexcept
9900{
9901 const qsizetype l = haystack0.size();
9902 const qsizetype sl = needle0.size();
9903 if (sl == 1)
9904 return findString(str: haystack0, from, ch: needle0[0], cs);
9905 if (from < 0)
9906 from += l;
9907 if (std::size_t(sl + from) > std::size_t(l))
9908 return -1;
9909 if (!sl)
9910 return from;
9911 if (!l)
9912 return -1;
9913
9914 /*
9915 We use the Boyer-Moore algorithm in cases where the overhead
9916 for the skip table should pay off, otherwise we use a simple
9917 hash function.
9918 */
9919 if (l > 500 && sl > 5)
9920 return qFindStringBoyerMoore(haystack: haystack0, from, needle: needle0, cs);
9921
9922 auto sv = [sl](const char16_t *v) { return QStringView(v, sl); };
9923 /*
9924 We use some hashing for efficiency's sake. Instead of
9925 comparing strings, we compare the hash value of str with that
9926 of a part of this QString. Only if that matches, we call
9927 qt_string_compare().
9928 */
9929 const char16_t *needle = needle0.utf16();
9930 const char16_t *haystack = haystack0.utf16() + from;
9931 const char16_t *end = haystack0.utf16() + (l - sl);
9932 const qregisteruint sl_minus_1 = sl - 1;
9933 qregisteruint hashNeedle = 0, hashHaystack = 0;
9934 qsizetype idx;
9935
9936 if (cs == Qt::CaseSensitive) {
9937 for (idx = 0; idx < sl; ++idx) {
9938 hashNeedle = ((hashNeedle<<1) + needle[idx]);
9939 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
9940 }
9941 hashHaystack -= haystack[sl_minus_1];
9942
9943 while (haystack <= end) {
9944 hashHaystack += haystack[sl_minus_1];
9945 if (hashHaystack == hashNeedle
9946 && QtPrivate::compareStrings(lhs: needle0, rhs: sv(haystack), cs: Qt::CaseSensitive) == 0)
9947 return haystack - haystack0.utf16();
9948
9949 REHASH(*haystack);
9950 ++haystack;
9951 }
9952 } else {
9953 const char16_t *haystack_start = haystack0.utf16();
9954 for (idx = 0; idx < sl; ++idx) {
9955 hashNeedle = (hashNeedle<<1) + foldCase(ch: needle + idx, start: needle);
9956 hashHaystack = (hashHaystack<<1) + foldCase(ch: haystack + idx, start: haystack_start);
9957 }
9958 hashHaystack -= foldCase(ch: haystack + sl_minus_1, start: haystack_start);
9959
9960 while (haystack <= end) {
9961 hashHaystack += foldCase(ch: haystack + sl_minus_1, start: haystack_start);
9962 if (hashHaystack == hashNeedle
9963 && QtPrivate::compareStrings(lhs: needle0, rhs: sv(haystack), cs: Qt::CaseInsensitive) == 0)
9964 return haystack - haystack0.utf16();
9965
9966 REHASH(foldCase(haystack, haystack_start));
9967 ++haystack;
9968 }
9969 }
9970 return -1;
9971}
9972
9973qsizetype QtPrivate::findString(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9974{
9975 if (haystack.size() < needle.size())
9976 return -1;
9977
9978 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(str: needle);
9979 return QtPrivate::findString(haystack0: haystack, from, needle0: QStringView(reinterpret_cast<const QChar*>(s.constData()), s.size()), cs);
9980}
9981
9982qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9983{
9984 if (haystack.size() < needle.size())
9985 return -1;
9986
9987 if (!QtPrivate::isLatin1(s: needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9988 return -1;
9989
9990 if (needle.size() == 1) {
9991 const char n = needle.front().toLatin1();
9992 return QtPrivate::findString(haystack, from, needle: QLatin1StringView(&n, 1), cs);
9993 }
9994
9995 QVarLengthArray<char> s(needle.size());
9996 qt_to_latin1_unchecked(dst: reinterpret_cast<uchar *>(s.data()), src: needle.utf16(), length: needle.size());
9997 return QtPrivate::findString(haystack, from, needle: QLatin1StringView(s.data(), s.size()), cs);
9998}
9999
10000qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
10001{
10002 if (from < 0)
10003 from += haystack.size();
10004 if (from < 0)
10005 return -1;
10006 qsizetype adjustedSize = haystack.size() - from;
10007 if (adjustedSize < needle.size())
10008 return -1;
10009 if (needle.size() == 0)
10010 return from;
10011
10012 if (cs == Qt::CaseSensitive) {
10013
10014 if (needle.size() == 1) {
10015 Q_ASSERT(haystack.data() != nullptr); // see size check above
10016 if (auto it = memchr(s: haystack.data() + from, c: needle.front().toLatin1(), n: adjustedSize))
10017 return static_cast<const char *>(it) - haystack.data();
10018 return -1;
10019 }
10020
10021 const QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseSensitive);
10022 return matcher.indexIn(haystack, from);
10023 }
10024
10025 // If the needle is sufficiently small we simply iteratively search through
10026 // the haystack. When the needle is too long we use a boyer-moore searcher
10027 // from the standard library, if available. If it is not available then the
10028 // QLatin1Strings are converted to QString and compared as such. Though
10029 // initialization is slower the boyer-moore search it employs still makes up
10030 // for it when haystack and needle are sufficiently long.
10031 // The needle size was chosen by testing various lengths using the
10032 // qstringtokenizer benchmark with the
10033 // "tokenize_qlatin1string_qlatin1string" test.
10034#ifdef Q_CC_MSVC
10035 const qsizetype threshold = 1;
10036#else
10037 const qsizetype threshold = 13;
10038#endif
10039 if (needle.size() <= threshold) {
10040 const auto begin = haystack.begin();
10041 const auto end = haystack.end() - needle.size() + 1;
10042 auto ciMatch = CaseInsensitiveL1::matcher(ch: needle[0].toLatin1());
10043 const qsizetype nlen1 = needle.size() - 1;
10044 for (auto it = std::find_if(first: begin + from, last: end, pred: ciMatch); it != end;
10045 it = std::find_if(first: it + 1, last: end, pred: ciMatch)) {
10046 // In this comparison we skip the first character because we know it's a match
10047 if (!nlen1 || QLatin1StringView(it + 1, nlen1).compare(other: needle.sliced(pos: 1), cs) == 0)
10048 return std::distance(first: begin, last: it);
10049 }
10050 return -1;
10051 }
10052
10053 QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseInsensitive);
10054 return matcher.indexIn(haystack, from);
10055}
10056
10057qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, char16_t needle, Qt::CaseSensitivity cs) noexcept
10058{
10059 return qLastIndexOf(haystack, needle: QChar(needle), from, cs);
10060}
10061
10062qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
10063{
10064 return qLastIndexOf(haystack0: haystack, from, needle0: needle, cs);
10065}
10066
10067qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
10068{
10069 return qLastIndexOf(haystack0: haystack, from, needle0: needle, cs);
10070}
10071
10072qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
10073{
10074 return qLastIndexOf(haystack0: haystack, from, needle0: needle, cs);
10075}
10076
10077qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
10078{
10079 return qLastIndexOf(haystack0: haystack, from, needle0: needle, cs);
10080}
10081
10082#if QT_CONFIG(regularexpression)
10083qsizetype QtPrivate::indexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10084{
10085 if (!re.isValid()) {
10086 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString(View)::indexOf");
10087 return -1;
10088 }
10089
10090 QRegularExpressionMatch match = stringHaystack
10091 ? re.match(subject: *stringHaystack, offset: from)
10092 : re.matchView(subjectView: viewHaystack, offset: from);
10093 if (match.hasMatch()) {
10094 const qsizetype ret = match.capturedStart();
10095 if (rmatch)
10096 *rmatch = std::move(match);
10097 return ret;
10098 }
10099
10100 return -1;
10101}
10102
10103qsizetype QtPrivate::indexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10104{
10105 return indexOf(viewHaystack: haystack, stringHaystack: nullptr, re, from, rmatch);
10106}
10107
10108qsizetype QtPrivate::lastIndexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10109{
10110 if (!re.isValid()) {
10111 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString(View)::lastIndexOf");
10112 return -1;
10113 }
10114
10115 qsizetype endpos = (from < 0) ? (viewHaystack.size() + from + 1) : (from + 1);
10116 QRegularExpressionMatchIterator iterator = stringHaystack
10117 ? re.globalMatch(subject: *stringHaystack)
10118 : re.globalMatchView(subjectView: viewHaystack);
10119 qsizetype lastIndex = -1;
10120 while (iterator.hasNext()) {
10121 QRegularExpressionMatch match = iterator.next();
10122 qsizetype start = match.capturedStart();
10123 if (start < endpos) {
10124 lastIndex = start;
10125 if (rmatch)
10126 *rmatch = std::move(match);
10127 } else {
10128 break;
10129 }
10130 }
10131
10132 return lastIndex;
10133}
10134
10135qsizetype QtPrivate::lastIndexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10136{
10137 return lastIndexOf(viewHaystack: haystack, stringHaystack: nullptr, re, from, rmatch);
10138}
10139
10140bool QtPrivate::contains(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10141{
10142 if (!re.isValid()) {
10143 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString(View)::contains");
10144 return false;
10145 }
10146 QRegularExpressionMatch m = stringHaystack
10147 ? re.match(subject: *stringHaystack)
10148 : re.matchView(subjectView: viewHaystack);
10149 bool hasMatch = m.hasMatch();
10150 if (hasMatch && rmatch)
10151 *rmatch = std::move(m);
10152 return hasMatch;
10153}
10154
10155bool QtPrivate::contains(QStringView haystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10156{
10157 return contains(viewHaystack: haystack, stringHaystack: nullptr, re, rmatch);
10158}
10159
10160qsizetype QtPrivate::count(QStringView haystack, const QRegularExpression &re)
10161{
10162 if (!re.isValid()) {
10163 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString(View)::count");
10164 return 0;
10165 }
10166 qsizetype count = 0;
10167 qsizetype index = -1;
10168 qsizetype len = haystack.size();
10169 while (index <= len - 1) {
10170 QRegularExpressionMatch match = re.matchView(subjectView: haystack, offset: index + 1);
10171 if (!match.hasMatch())
10172 break;
10173 count++;
10174
10175 // Search again, from the next character after the beginning of this
10176 // capture. If the capture starts with a surrogate pair, both together
10177 // count as "one character".
10178 index = match.capturedStart();
10179 if (index < len && haystack[index].isHighSurrogate())
10180 ++index;
10181 }
10182 return count;
10183}
10184
10185#endif // QT_CONFIG(regularexpression)
10186
10187/*!
10188 \since 5.0
10189
10190 Converts a plain text string to an HTML string with
10191 HTML metacharacters \c{<}, \c{>}, \c{&}, and \c{"} replaced by HTML
10192 entities.
10193
10194 Example:
10195
10196 \snippet code/src_corelib_text_qstring.cpp 7
10197*/
10198QString QString::toHtmlEscaped() const
10199{
10200 const auto pos = std::u16string_view(*this).find_first_of(str: u"<>&\"");
10201 if (pos == std::u16string_view::npos)
10202 return *this;
10203 QString rich;
10204 const qsizetype len = size();
10205 rich.reserve(asize: qsizetype(len * 1.1));
10206 rich += qToStringViewIgnoringNull(s: *this).first(n: pos);
10207 for (auto ch : qToStringViewIgnoringNull(s: *this).sliced(pos)) {
10208 if (ch == u'<')
10209 rich += "&lt;"_L1;
10210 else if (ch == u'>')
10211 rich += "&gt;"_L1;
10212 else if (ch == u'&')
10213 rich += "&amp;"_L1;
10214 else if (ch == u'"')
10215 rich += "&quot;"_L1;
10216 else
10217 rich += ch;
10218 }
10219 rich.squeeze();
10220 return rich;
10221}
10222
10223/*!
10224 \macro QStringLiteral(str)
10225 \relates QString
10226
10227 The macro generates the data for a QString out of the string literal \a str
10228 at compile time. Creating a QString from it is free in this case, and the
10229 generated string data is stored in the read-only segment of the compiled
10230 object file.
10231
10232 If you have code that looks like this:
10233
10234 \snippet code/src_corelib_text_qstring.cpp 9
10235
10236 then a temporary QString will be created to be passed as the \c{hasAttribute}
10237 function parameter. This can be quite expensive, as it involves a memory
10238 allocation and the copy/conversion of the data into QString's internal
10239 encoding.
10240
10241 This cost can be avoided by using QStringLiteral instead:
10242
10243 \snippet code/src_corelib_text_qstring.cpp 10
10244
10245 In this case, QString's internal data will be generated at compile time; no
10246 conversion or allocation will occur at runtime.
10247
10248 Using QStringLiteral instead of a double quoted plain C++ string literal can
10249 significantly speed up creation of QString instances from data known at
10250 compile time.
10251
10252 \note QLatin1StringView can still be more efficient than QStringLiteral
10253 when the string is passed to a function that has an overload taking
10254 QLatin1StringView and this overload avoids conversion to QString. For
10255 instance, QString::operator==() can compare to a QLatin1StringView
10256 directly:
10257
10258 \snippet code/src_corelib_text_qstring.cpp 11
10259
10260 \note Some compilers have bugs encoding strings containing characters outside
10261 the US-ASCII character set. Make sure you prefix your string with \c{u} in
10262 those cases. It is optional otherwise.
10263
10264 \sa QByteArrayLiteral
10265*/
10266
10267#if QT_DEPRECATED_SINCE(6, 8)
10268/*!
10269 \fn QtLiterals::operator""_qs(const char16_t *str, size_t size)
10270
10271 \relates QString
10272 \since 6.2
10273 \deprecated [6.8] Use \c _s from Qt::StringLiterals namespace instead.
10274
10275 Literal operator that creates a QString out of the first \a size characters in
10276 the char16_t string literal \a str.
10277
10278 The QString is created at compile time, and the generated string data is stored
10279 in the read-only segment of the compiled object file. Duplicate literals may
10280 share the same read-only memory. This functionality is interchangeable with
10281 QStringLiteral, but saves typing when many string literals are present in the
10282 code.
10283
10284 The following code creates a QString:
10285 \code
10286 auto str = u"hello"_qs;
10287 \endcode
10288
10289 \sa QStringLiteral, QtLiterals::operator""_qba(const char *str, size_t size)
10290*/
10291#endif // QT_DEPRECATED_SINCE(6, 8)
10292
10293/*!
10294 \fn Qt::Literals::StringLiterals::operator""_s(const char16_t *str, size_t size)
10295
10296 \relates QString
10297 \since 6.4
10298
10299 Literal operator that creates a QString out of the first \a size characters in
10300 the char16_t string literal \a str.
10301
10302 The QString is created at compile time, and the generated string data is stored
10303 in the read-only segment of the compiled object file. Duplicate literals may
10304 share the same read-only memory. This functionality is interchangeable with
10305 QStringLiteral, but saves typing when many string literals are present in the
10306 code.
10307
10308 The following code creates a QString:
10309 \code
10310 using namespace Qt::Literals::StringLiterals;
10311
10312 auto str = u"hello"_s;
10313 \endcode
10314
10315 \sa Qt::Literals::StringLiterals
10316*/
10317
10318/*!
10319 \internal
10320 */
10321void QAbstractConcatenable::appendLatin1To(QLatin1StringView in, QChar *out) noexcept
10322{
10323 qt_from_latin1(dst: reinterpret_cast<char16_t *>(out), str: in.data(), size: size_t(in.size()));
10324}
10325
10326/*!
10327 \fn template <typename T> qsizetype erase(QString &s, const T &t)
10328 \relates QString
10329 \since 6.1
10330
10331 Removes all elements that compare equal to \a t from the
10332 string \a s. Returns the number of elements removed, if any.
10333
10334 \sa erase_if
10335*/
10336
10337/*!
10338 \fn template <typename Predicate> qsizetype erase_if(QString &s, Predicate pred)
10339 \relates QString
10340 \since 6.1
10341
10342 Removes all elements for which the predicate \a pred returns true
10343 from the string \a s. Returns the number of elements removed, if
10344 any.
10345
10346 \sa erase
10347*/
10348
10349/*!
10350 \macro const char *qPrintable(const QString &str)
10351 \relates QString
10352
10353 Returns \a str as a \c{const char *}. This is equivalent to
10354 \a{str}.toLocal8Bit().constData().
10355
10356 The char pointer will be invalid after the statement in which
10357 qPrintable() is used. This is because the array returned by
10358 QString::toLocal8Bit() will fall out of scope.
10359
10360 \note qDebug(), qInfo(), qWarning(), qCritical(), qFatal() expect
10361 %s arguments to be UTF-8 encoded, while qPrintable() converts to
10362 local 8-bit encoding. Therefore qUtf8Printable() should be used
10363 for logging strings instead of qPrintable().
10364
10365 \sa qUtf8Printable()
10366*/
10367
10368/*!
10369 \macro const char *qUtf8Printable(const QString &str)
10370 \relates QString
10371 \since 5.4
10372
10373 Returns \a str as a \c{const char *}. This is equivalent to
10374 \a{str}.toUtf8().constData().
10375
10376 The char pointer will be invalid after the statement in which
10377 qUtf8Printable() is used. This is because the array returned by
10378 QString::toUtf8() will fall out of scope.
10379
10380 Example:
10381
10382 \snippet code/src_corelib_text_qstring.cpp qUtf8Printable
10383
10384 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10385*/
10386
10387/*!
10388 \macro const wchar_t *qUtf16Printable(const QString &str)
10389 \relates QString
10390 \since 5.7
10391
10392 Returns \a str as a \c{const ushort *}, but cast to a \c{const wchar_t *}
10393 to avoid warnings. This is equivalent to \a{str}.utf16() plus some casting.
10394
10395 The only useful thing you can do with the return value of this macro is to
10396 pass it to QString::asprintf() for use in a \c{%ls} conversion. In particular,
10397 the return value is \e{not} a valid \c{const wchar_t*}!
10398
10399 In general, the pointer will be invalid after the statement in which
10400 qUtf16Printable() is used. This is because the pointer may have been
10401 obtained from a temporary expression, which will fall out of scope.
10402
10403 Example:
10404
10405 \snippet code/src_corelib_text_qstring.cpp qUtf16Printable
10406
10407 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10408*/
10409
10410QT_END_NAMESPACE
10411
10412#undef REHASH
10413

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of qtbase/src/corelib/text/qstring.cpp