1// Copyright (C) 2021 The Qt Company Ltd.
2// Copyright (C) 2022 Intel Corporation.
3// Copyright (C) 2019 Mail.ru Group.
4// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
5
6#include "qstringlist.h"
7#if QT_CONFIG(regularexpression)
8#include "qregularexpression.h"
9#endif
10#include "qunicodetables_p.h"
11#include <private/qstringconverter_p.h>
12#include <private/qtools_p.h>
13#include "qlocale_tools_p.h"
14#include "private/qsimd_p.h"
15#include <qnumeric.h>
16#include <qdatastream.h>
17#include <qlist.h>
18#include "qlocale.h"
19#include "qlocale_p.h"
20#include "qspan.h"
21#include "qstringbuilder.h"
22#include "qstringmatcher.h"
23#include "qvarlengtharray.h"
24#include "qdebug.h"
25#include "qendian.h"
26#include "qcollator.h"
27#include "qttypetraits.h"
28
29#ifdef Q_OS_DARWIN
30#include <private/qcore_mac_p.h>
31#endif
32
33#include <private/qfunctions_p.h>
34
35#include <limits.h>
36#include <string.h>
37#include <stdlib.h>
38#include <stdio.h>
39#include <stdarg.h>
40#include <wchar.h>
41
42#include "qchar.cpp"
43#include "qlatin1stringmatcher.h"
44#include "qstringmatcher.cpp"
45#include "qstringiterator_p.h"
46#include "qstringalgorithms_p.h"
47#include "qthreadstorage.h"
48
49#include <algorithm>
50#include <functional>
51
52#ifdef Q_OS_WIN
53# include <qt_windows.h>
54# if !defined(QT_BOOTSTRAPPED) && (defined(QT_NO_CAST_FROM_ASCII) || defined(QT_NO_CAST_TO_ASCII))
55// MSVC requires this, but let's apply it to MinGW compilers too, just in case
56# error "This file cannot be compiled with QT_NO_CAST_{TO,FROM}_ASCII, " \
57 "otherwise some QString functions will not get exported."
58# endif
59#endif
60
61#ifdef truncate
62# undef truncate
63#endif
64
65#define REHASH(a) \
66 if (sl_minus_1 < sizeof(sl_minus_1) * CHAR_BIT) \
67 hashHaystack -= decltype(hashHaystack)(a) << sl_minus_1; \
68 hashHaystack <<= 1
69
70QT_BEGIN_NAMESPACE
71
72using namespace Qt::StringLiterals;
73using namespace QtMiscUtils;
74
75const char16_t QString::_empty = 0;
76
77// in qstringmatcher.cpp
78qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs);
79
80namespace {
81enum StringComparisonMode {
82 CompareStringsForEquality,
83 CompareStringsForOrdering
84};
85
86template <typename Pointer>
87char32_t foldCaseHelper(Pointer ch, Pointer start) = delete;
88
89template <>
90char32_t foldCaseHelper<const QChar*>(const QChar* ch, const QChar* start)
91{
92 return foldCase(ch: reinterpret_cast<const char16_t*>(ch),
93 start: reinterpret_cast<const char16_t*>(start));
94}
95
96template <>
97char32_t foldCaseHelper<const char*>(const char* ch, const char*)
98{
99 return foldCase(ch: char16_t(uchar(*ch)));
100}
101
102template <typename T>
103char16_t valueTypeToUtf16(T t) = delete;
104
105template <>
106char16_t valueTypeToUtf16<QChar>(QChar t)
107{
108 return t.unicode();
109}
110
111template <>
112char16_t valueTypeToUtf16<char>(char t)
113{
114 return char16_t{uchar(t)};
115}
116
117template <typename T>
118static inline bool foldAndCompare(const T a, const T b)
119{
120 return foldCase(a) == b;
121}
122
123/*!
124 \internal
125
126 Returns the index position of the first occurrence of the
127 character \a ch in the string given by \a str and \a len,
128 searching forward from index
129 position \a from. Returns -1 if \a ch could not be found.
130*/
131template <typename Haystack>
132static inline qsizetype qLastIndexOf(Haystack haystack, QChar needle,
133 qsizetype from, Qt::CaseSensitivity cs) noexcept
134{
135 if (haystack.size() == 0)
136 return -1;
137 if (from < 0)
138 from += haystack.size();
139 else if (std::size_t(from) > std::size_t(haystack.size()))
140 from = haystack.size() - 1;
141 if (from >= 0) {
142 char16_t c = needle.unicode();
143 const auto b = haystack.data();
144 auto n = b + from;
145 if (cs == Qt::CaseSensitive) {
146 for (; n >= b; --n)
147 if (valueTypeToUtf16(*n) == c)
148 return n - b;
149 } else {
150 c = foldCase(ch: c);
151 for (; n >= b; --n)
152 if (foldCase(valueTypeToUtf16(*n)) == c)
153 return n - b;
154 }
155 }
156 return -1;
157}
158template <> qsizetype
159qLastIndexOf(QString, QChar, qsizetype, Qt::CaseSensitivity) noexcept = delete; // unwanted, would detach
160
161template<typename Haystack, typename Needle>
162static qsizetype qLastIndexOf(Haystack haystack0, qsizetype from,
163 Needle needle0, Qt::CaseSensitivity cs) noexcept
164{
165 const qsizetype sl = needle0.size();
166 if (sl == 1)
167 return qLastIndexOf(haystack0, needle0.front(), from, cs);
168
169 const qsizetype l = haystack0.size();
170 if (from < 0)
171 from += l;
172 if (from == l && sl == 0)
173 return from;
174 const qsizetype delta = l - sl;
175 if (std::size_t(from) > std::size_t(l) || delta < 0)
176 return -1;
177 if (from > delta)
178 from = delta;
179
180 auto sv = [sl](const typename Haystack::value_type *v) { return Haystack(v, sl); };
181
182 auto haystack = haystack0.data();
183 const auto needle = needle0.data();
184 const auto *end = haystack;
185 haystack += from;
186 const qregisteruint sl_minus_1 = sl ? sl - 1 : 0;
187 const auto *n = needle + sl_minus_1;
188 const auto *h = haystack + sl_minus_1;
189 qregisteruint hashNeedle = 0, hashHaystack = 0;
190
191 if (cs == Qt::CaseSensitive) {
192 for (qsizetype idx = 0; idx < sl; ++idx) {
193 hashNeedle = (hashNeedle << 1) + valueTypeToUtf16(*(n - idx));
194 hashHaystack = (hashHaystack << 1) + valueTypeToUtf16(*(h - idx));
195 }
196 hashHaystack -= valueTypeToUtf16(*haystack);
197
198 while (haystack >= end) {
199 hashHaystack += valueTypeToUtf16(*haystack);
200 if (hashHaystack == hashNeedle
201 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
202 return haystack - end;
203 --haystack;
204 REHASH(valueTypeToUtf16(haystack[sl]));
205 }
206 } else {
207 for (qsizetype idx = 0; idx < sl; ++idx) {
208 hashNeedle = (hashNeedle << 1) + foldCaseHelper(n - idx, needle);
209 hashHaystack = (hashHaystack << 1) + foldCaseHelper(h - idx, end);
210 }
211 hashHaystack -= foldCaseHelper(haystack, end);
212
213 while (haystack >= end) {
214 hashHaystack += foldCaseHelper(haystack, end);
215 if (hashHaystack == hashNeedle
216 && QtPrivate::compareStrings(sv(haystack), needle0, Qt::CaseInsensitive) == 0)
217 return haystack - end;
218 --haystack;
219 REHASH(foldCaseHelper(haystack + sl, end));
220 }
221 }
222 return -1;
223}
224
225template <typename Haystack, typename Needle>
226bool qt_starts_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
227{
228 if (haystack.isNull())
229 return needle.isNull();
230 const auto haystackLen = haystack.size();
231 const auto needleLen = needle.size();
232 if (haystackLen == 0)
233 return needleLen == 0;
234 if (needleLen > haystackLen)
235 return false;
236
237 return QtPrivate::compareStrings(haystack.first(needleLen), needle, cs) == 0;
238}
239
240template <typename Haystack, typename Needle>
241bool qt_ends_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
242{
243 if (haystack.isNull())
244 return needle.isNull();
245 const auto haystackLen = haystack.size();
246 const auto needleLen = needle.size();
247 if (haystackLen == 0)
248 return needleLen == 0;
249 if (haystackLen < needleLen)
250 return false;
251
252 return QtPrivate::compareStrings(haystack.last(needleLen), needle, cs) == 0;
253}
254
255template <typename T>
256static void append_helper(QString &self, T view)
257{
258 const auto strData = view.data();
259 const qsizetype strSize = view.size();
260 auto &d = self.data_ptr();
261 if (strData && strSize > 0) {
262 // the number of UTF-8 code units is always at a minimum equal to the number
263 // of equivalent UTF-16 code units
264 d.detachAndGrow(where: QArrayData::GrowsAtEnd, n: strSize, data: nullptr, old: nullptr);
265 Q_CHECK_PTR(d.data());
266 Q_ASSERT(strSize <= d.freeSpaceAtEnd());
267
268 auto dst = std::next(x: d.data(), n: d.size);
269 if constexpr (std::is_same_v<T, QUtf8StringView>) {
270 dst = QUtf8::convertToUnicode(dst, view);
271 } else if constexpr (std::is_same_v<T, QLatin1StringView>) {
272 QLatin1::convertToUnicode(dst, view);
273 dst += strSize;
274 } else {
275 static_assert(QtPrivate::type_dependent_false<T>(),
276 "Can only operate on UTF-8 and Latin-1");
277 }
278 self.resize(size: std::distance(first: d.begin(), last: dst));
279 } else if (d.isNull() && !view.isNull()) { // special case
280 self = QLatin1StringView("");
281 }
282}
283
284template <uint MaxCount> struct UnrollTailLoop
285{
286 template <typename RetType, typename Functor1, typename Functor2, typename Number>
287 static inline RetType exec(Number count, RetType returnIfExited, Functor1 loopCheck, Functor2 returnIfFailed, Number i = 0)
288 {
289 /* equivalent to:
290 * while (count--) {
291 * if (loopCheck(i))
292 * return returnIfFailed(i);
293 * }
294 * return returnIfExited;
295 */
296
297 if (!count)
298 return returnIfExited;
299
300 bool check = loopCheck(i);
301 if (check)
302 return returnIfFailed(i);
303
304 return UnrollTailLoop<MaxCount - 1>::exec(count - 1, returnIfExited, loopCheck, returnIfFailed, i + 1);
305 }
306
307 template <typename Functor, typename Number>
308 static inline void exec(Number count, Functor code)
309 {
310 /* equivalent to:
311 * for (Number i = 0; i < count; ++i)
312 * code(i);
313 */
314 exec(count, 0, [=](Number i) -> bool { code(i); return false; }, [](Number) { return 0; });
315 }
316};
317template <> template <typename RetType, typename Functor1, typename Functor2, typename Number>
318inline RetType UnrollTailLoop<0>::exec(Number, RetType returnIfExited, Functor1, Functor2, Number)
319{
320 return returnIfExited;
321}
322} // unnamed namespace
323
324/*
325 * Note on the use of SIMD in qstring.cpp:
326 *
327 * Several operations with strings are improved with the use of SIMD code,
328 * since they are repetitive. For MIPS, we have hand-written assembly code
329 * outside of qstring.cpp targeting MIPS DSP and MIPS DSPr2. For ARM and for
330 * x86, we can only use intrinsics and therefore everything is contained in
331 * qstring.cpp. We need to use intrinsics only for those platforms due to the
332 * different compilers and toolchains used, which have different syntax for
333 * assembly sources.
334 *
335 * ** SSE notes: **
336 *
337 * Whenever multiple alternatives are equivalent or near so, we prefer the one
338 * using instructions from SSE2, since SSE2 is guaranteed to be enabled for all
339 * 64-bit builds and we enable it for 32-bit builds by default. Use of higher
340 * SSE versions should be done when there is a clear performance benefit and
341 * requires fallback code to SSE2, if it exists.
342 *
343 * Performance measurement in the past shows that most strings are short in
344 * size and, therefore, do not benefit from alignment prologues. That is,
345 * trying to find a 16-byte-aligned boundary to operate on is often more
346 * expensive than executing the unaligned operation directly. In addition, note
347 * that the QString private data is designed so that the data is stored on
348 * 16-byte boundaries if the system malloc() returns 16-byte aligned pointers
349 * on its own (64-bit glibc on Linux does; 32-bit glibc on Linux returns them
350 * 50% of the time), so skipping the alignment prologue is actually optimizing
351 * for the common case.
352 */
353
354#if defined(__mips_dsp)
355// From qstring_mips_dsp_asm.S
356extern "C" void qt_fromlatin1_mips_asm_unroll4 (char16_t*, const char*, uint);
357extern "C" void qt_fromlatin1_mips_asm_unroll8 (char16_t*, const char*, uint);
358extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const char16_t *src, int length);
359#endif
360
361#if defined(__SSE2__) && defined(Q_CC_GNU)
362// We may overrun the buffer, but that's a false positive:
363// this won't crash nor produce incorrect results
364# define ATTRIBUTE_NO_SANITIZE __attribute__((__no_sanitize_address__, __no_sanitize_thread__))
365#else
366# define ATTRIBUTE_NO_SANITIZE
367#endif
368
369#ifdef __SSE2__
370static constexpr bool UseSse4_1 = bool(qCompilerCpuFeatures & CpuFeatureSSE4_1);
371static constexpr bool UseAvx2 = UseSse4_1 &&
372 (qCompilerCpuFeatures & CpuFeatureArchHaswell) == CpuFeatureArchHaswell;
373
374[[maybe_unused]]
375static Q_ALWAYS_INLINE __m128i mm_load8_zero_extend(const void *ptr)
376{
377 const __m128i *dataptr = static_cast<const __m128i *>(ptr);
378 if constexpr (UseSse4_1) {
379 // use a MOVQ followed by PMOVZXBW
380 // if AVX2 is present, these should combine into a single VPMOVZXBW instruction
381 __m128i data = _mm_loadl_epi64(p: dataptr);
382 return _mm_cvtepu8_epi16(V: data);
383 }
384
385 // use MOVQ followed by PUNPCKLBW
386 __m128i data = _mm_loadl_epi64(p: dataptr);
387 return _mm_unpacklo_epi8(a: data, b: _mm_setzero_si128());
388}
389
390[[maybe_unused]] ATTRIBUTE_NO_SANITIZE
391static qsizetype qustrlen_sse2(const char16_t *str) noexcept
392{
393 // find the 16-byte alignment immediately prior or equal to str
394 quintptr misalignment = quintptr(str) & 0xf;
395 Q_ASSERT((misalignment & 1) == 0);
396 const char16_t *ptr = str - (misalignment / 2);
397
398 // load 16 bytes and see if we have a null
399 // (aligned loads can never segfault)
400 const __m128i zeroes = _mm_setzero_si128();
401 __m128i data = _mm_load_si128(p: reinterpret_cast<const __m128i *>(ptr));
402 __m128i comparison = _mm_cmpeq_epi16(a: data, b: zeroes);
403 uint mask = _mm_movemask_epi8(a: comparison);
404
405 // ignore the result prior to the beginning of str
406 mask >>= misalignment;
407
408 // Have we found something in the first block? Need to handle it now
409 // because of the left shift above.
410 if (mask)
411 return qCountTrailingZeroBits(v: mask) / sizeof(char16_t);
412
413 constexpr qsizetype Step = sizeof(__m128i) / sizeof(char16_t);
414 qsizetype size = Step - misalignment / sizeof(char16_t);
415
416 size -= Step;
417 do {
418 size += Step;
419 data = _mm_load_si128(p: reinterpret_cast<const __m128i *>(str + size));
420
421 comparison = _mm_cmpeq_epi16(a: data, b: zeroes);
422 mask = _mm_movemask_epi8(a: comparison);
423 } while (mask == 0);
424
425 // found a null
426 return size + qCountTrailingZeroBits(v: mask) / sizeof(char16_t);
427}
428
429// Scans from \a ptr to \a end until \a maskval is non-zero. Returns true if
430// the no non-zero was found. Returns false and updates \a ptr to point to the
431// first 16-bit word that has any bit set (note: if the input is 8-bit, \a ptr
432// may be updated to one byte short).
433static bool simdTestMask(const char *&ptr, const char *end, quint32 maskval)
434{
435 auto updatePtr = [&](uint result) {
436 // found a character matching the mask
437 uint idx = qCountTrailingZeroBits(v: ~result);
438 ptr += idx;
439 return false;
440 };
441
442 if constexpr (UseSse4_1) {
443# ifndef Q_OS_QNX // compiler fails in the code below
444 __m128i mask;
445 auto updatePtrSimd = [&](__m128i data) -> bool {
446 __m128i masked = _mm_and_si128(a: mask, b: data);
447 __m128i comparison = _mm_cmpeq_epi16(a: masked, b: _mm_setzero_si128());
448 uint result = _mm_movemask_epi8(a: comparison);
449 return updatePtr(result);
450 };
451
452 if constexpr (UseAvx2) {
453 // AVX2 implementation: test 32 bytes at a time
454 const __m256i mask256 = _mm256_broadcastd_epi32(X: _mm_cvtsi32_si128(a: maskval));
455 while (ptr + 32 <= end) {
456 __m256i data = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(ptr));
457 if (!_mm256_testz_si256(a: mask256, b: data)) {
458 // found a character matching the mask
459 __m256i masked256 = _mm256_and_si256(a: mask256, b: data);
460 __m256i comparison256 = _mm256_cmpeq_epi16(a: masked256, b: _mm256_setzero_si256());
461 return updatePtr(_mm256_movemask_epi8(a: comparison256));
462 }
463 ptr += 32;
464 }
465
466 mask = _mm256_castsi256_si128(a: mask256);
467 } else {
468 // SSE 4.1 implementation: test 32 bytes at a time (two 16-byte
469 // comparisons, unrolled)
470 mask = _mm_set1_epi32(i: maskval);
471 while (ptr + 32 <= end) {
472 __m128i data1 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(ptr));
473 __m128i data2 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(ptr + 16));
474 if (!_mm_testz_si128(M: mask, V: data1))
475 return updatePtrSimd(data1);
476
477 ptr += 16;
478 if (!_mm_testz_si128(M: mask, V: data2))
479 return updatePtrSimd(data2);
480 ptr += 16;
481 }
482 }
483
484 // AVX2 and SSE4.1: final 16-byte comparison
485 if (ptr + 16 <= end) {
486 __m128i data1 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(ptr));
487 if (!_mm_testz_si128(M: mask, V: data1))
488 return updatePtrSimd(data1);
489 ptr += 16;
490 }
491
492 // and final 8-byte comparison
493 if (ptr + 8 <= end) {
494 __m128i data1 = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(ptr));
495 if (!_mm_testz_si128(M: mask, V: data1))
496 return updatePtrSimd(data1);
497 ptr += 8;
498 }
499
500 return true;
501# endif // QNX
502 }
503
504 // SSE2 implementation: test 16 bytes at a time.
505 const __m128i mask = _mm_set1_epi32(i: maskval);
506 while (ptr + 16 <= end) {
507 __m128i data = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(ptr));
508 __m128i masked = _mm_and_si128(a: mask, b: data);
509 __m128i comparison = _mm_cmpeq_epi16(a: masked, b: _mm_setzero_si128());
510 quint16 result = _mm_movemask_epi8(a: comparison);
511 if (result != 0xffff)
512 return updatePtr(result);
513 ptr += 16;
514 }
515
516 // and one 8-byte comparison
517 if (ptr + 8 <= end) {
518 __m128i data = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(ptr));
519 __m128i masked = _mm_and_si128(a: mask, b: data);
520 __m128i comparison = _mm_cmpeq_epi16(a: masked, b: _mm_setzero_si128());
521 quint8 result = _mm_movemask_epi8(a: comparison);
522 if (result != 0xff)
523 return updatePtr(result);
524 ptr += 8;
525 }
526
527 return true;
528}
529
530template <StringComparisonMode Mode, typename Char> [[maybe_unused]]
531static int ucstrncmp_sse2(const char16_t *a, const Char *b, size_t l)
532{
533 static_assert(std::is_unsigned_v<Char>);
534
535 // Using the PMOVMSKB instruction, we get two bits for each UTF-16 character
536 // we compare. This lambda helps extract the code unit.
537 static const auto codeUnitAt = [](const auto *n, qptrdiff idx) -> int {
538 constexpr int Stride = 2;
539 // this is the same as:
540 // return n[idx / Stride];
541 // but using pointer arithmetic to avoid the compiler dividing by two
542 // and multiplying by two in the case of char16_t (we know idx is even,
543 // but the compiler does not). This is not UB.
544
545 auto ptr = reinterpret_cast<const uchar *>(n);
546 ptr += idx / (Stride / sizeof(*n));
547 return *reinterpret_cast<decltype(n)>(ptr);
548 };
549 auto difference = [a, b](uint mask, qptrdiff offset) {
550 if (Mode == CompareStringsForEquality)
551 return 1;
552 uint idx = qCountTrailingZeroBits(v: mask);
553 return codeUnitAt(a + offset, idx) - codeUnitAt(b + offset, idx);
554 };
555
556 static const auto load8Chars = [](const auto *ptr) {
557 if (sizeof(*ptr) == 2)
558 return _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(ptr));
559 __m128i chunk = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(ptr));
560 return _mm_unpacklo_epi8(a: chunk, b: _mm_setzero_si128());
561 };
562 static const auto load4Chars = [](const auto *ptr) {
563 if (sizeof(*ptr) == 2)
564 return _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(ptr));
565 __m128i chunk = _mm_cvtsi32_si128(qFromUnaligned<quint32>(ptr));
566 return _mm_unpacklo_epi8(a: chunk, b: _mm_setzero_si128());
567 };
568
569 // we're going to read a[0..15] and b[0..15] (32 bytes)
570 auto processChunk16Chars = [a, b](qptrdiff offset) -> uint {
571 if constexpr (UseAvx2) {
572 __m256i a_data = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(a + offset));
573 __m256i b_data;
574 if (sizeof(Char) == 1) {
575 // expand to UTF-16 via zero-extension
576 __m128i chunk = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(b + offset));
577 b_data = _mm256_cvtepu8_epi16(V: chunk);
578 } else {
579 b_data = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(b + offset));
580 }
581 __m256i result = _mm256_cmpeq_epi16(a: a_data, b: b_data);
582 return _mm256_movemask_epi8(a: result);
583 }
584
585 __m128i a_data1 = load8Chars(a + offset);
586 __m128i a_data2 = load8Chars(a + offset + 8);
587 __m128i b_data1, b_data2;
588 if (sizeof(Char) == 1) {
589 // expand to UTF-16 via unpacking
590 __m128i b_data = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(b + offset));
591 b_data1 = _mm_unpacklo_epi8(a: b_data, b: _mm_setzero_si128());
592 b_data2 = _mm_unpackhi_epi8(a: b_data, b: _mm_setzero_si128());
593 } else {
594 b_data1 = load8Chars(b + offset);
595 b_data2 = load8Chars(b + offset + 8);
596 }
597 __m128i result1 = _mm_cmpeq_epi16(a: a_data1, b: b_data1);
598 __m128i result2 = _mm_cmpeq_epi16(a: a_data2, b: b_data2);
599 return _mm_movemask_epi8(a: result1) | _mm_movemask_epi8(a: result2) << 16;
600 };
601
602 if (l >= sizeof(__m256i) / sizeof(char16_t)) {
603 qptrdiff offset = 0;
604 for ( ; l >= offset + sizeof(__m256i) / sizeof(char16_t); offset += sizeof(__m256i) / sizeof(char16_t)) {
605 uint mask = ~processChunk16Chars(offset);
606 if (mask)
607 return difference(mask, offset);
608 }
609
610 // maybe overlap the last 32 bytes
611 if (size_t(offset) < l) {
612 offset = l - sizeof(__m256i) / sizeof(char16_t);
613 uint mask = ~processChunk16Chars(offset);
614 return mask ? difference(mask, offset) : 0;
615 }
616 } else if (l >= 4) {
617 __m128i a_data1, b_data1;
618 __m128i a_data2, b_data2;
619 int width;
620 if (l >= 8) {
621 width = 8;
622 a_data1 = load8Chars(a);
623 b_data1 = load8Chars(b);
624 a_data2 = load8Chars(a + l - width);
625 b_data2 = load8Chars(b + l - width);
626 } else {
627 // we're going to read a[0..3] and b[0..3] (8 bytes)
628 width = 4;
629 a_data1 = load4Chars(a);
630 b_data1 = load4Chars(b);
631 a_data2 = load4Chars(a + l - width);
632 b_data2 = load4Chars(b + l - width);
633 }
634
635 __m128i result = _mm_cmpeq_epi16(a: a_data1, b: b_data1);
636 ushort mask = ~_mm_movemask_epi8(a: result);
637 if (mask)
638 return difference(mask, 0);
639
640 result = _mm_cmpeq_epi16(a: a_data2, b: b_data2);
641 mask = ~_mm_movemask_epi8(a: result);
642 if (mask)
643 return difference(mask, l - width);
644 } else {
645 // reset l
646 l &= 3;
647
648 const auto lambda = [=](size_t i) -> int {
649 return a[i] - b[i];
650 };
651 return UnrollTailLoop<3>::exec(l, 0, lambda, lambda);
652 }
653 return 0;
654}
655#endif
656
657Q_NEVER_INLINE
658qsizetype QtPrivate::qustrlen(const char16_t *str) noexcept
659{
660#if defined(__SSE2__) && !(defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)) && !(defined(__SANITIZE_THREAD__) || __has_feature(thread_sanitizer))
661 return qustrlen_sse2(str);
662#endif
663
664 if (sizeof(wchar_t) == sizeof(char16_t))
665 return wcslen(s: reinterpret_cast<const wchar_t *>(str));
666
667 qsizetype result = 0;
668 while (*str++)
669 ++result;
670 return result;
671}
672
673qsizetype QtPrivate::qustrnlen(const char16_t *str, qsizetype maxlen) noexcept
674{
675 return qustrchr(str: { str, maxlen }, ch: u'\0') - str;
676}
677
678/*!
679 * \internal
680 *
681 * Searches for character \a c in the string \a str and returns a pointer to
682 * it. Unlike strchr() and wcschr() (but like glibc's strchrnul()), if the
683 * character is not found, this function returns a pointer to the end of the
684 * string -- that is, \c{str.end()}.
685 */
686Q_NEVER_INLINE
687const char16_t *QtPrivate::qustrchr(QStringView str, char16_t c) noexcept
688{
689 const char16_t *n = str.utf16();
690 const char16_t *e = n + str.size();
691
692#ifdef __SSE2__
693 bool loops = true;
694 // Using the PMOVMSKB instruction, we get two bits for each character
695 // we compare.
696 __m128i mch;
697 if constexpr (UseAvx2) {
698 // we're going to read n[0..15] (32 bytes)
699 __m256i mch256 = _mm256_set1_epi32(i: c | (c << 16));
700 for (const char16_t *next = n + 16; next <= e; n = next, next += 16) {
701 __m256i data = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(n));
702 __m256i result = _mm256_cmpeq_epi16(a: data, b: mch256);
703 uint mask = uint(_mm256_movemask_epi8(a: result));
704 if (mask) {
705 uint idx = qCountTrailingZeroBits(v: mask);
706 return n + idx / 2;
707 }
708 }
709 loops = false;
710 mch = _mm256_castsi256_si128(a: mch256);
711 } else {
712 mch = _mm_set1_epi32(i: c | (c << 16));
713 }
714
715 auto hasMatch = [mch, &n](__m128i data, ushort validityMask) {
716 __m128i result = _mm_cmpeq_epi16(a: data, b: mch);
717 uint mask = uint(_mm_movemask_epi8(a: result));
718 if ((mask & validityMask) == 0)
719 return false;
720 uint idx = qCountTrailingZeroBits(v: mask);
721 n += idx / 2;
722 return true;
723 };
724
725 // we're going to read n[0..7] (16 bytes)
726 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
727 __m128i data = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(n));
728 if (hasMatch(data, 0xffff))
729 return n;
730
731 if (!loops) {
732 n += 8;
733 break;
734 }
735 }
736
737# if !defined(__OPTIMIZE_SIZE__)
738 // we're going to read n[0..3] (8 bytes)
739 if (e - n > 3) {
740 __m128i data = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(n));
741 if (hasMatch(data, 0xff))
742 return n;
743
744 n += 4;
745 }
746
747 return UnrollTailLoop<3>::exec(count: e - n, returnIfExited: e,
748 loopCheck: [=](qsizetype i) { return n[i] == c; },
749 returnIfFailed: [=](qsizetype i) { return n + i; });
750# endif
751#elif defined(__ARM_NEON__)
752 const uint16x8_t vmask = qvsetq_n_u16(1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7);
753 const uint16x8_t ch_vec = vdupq_n_u16(c);
754 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
755 uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t *>(n));
756 uint mask = vaddvq_u16(vandq_u16(vceqq_u16(data, ch_vec), vmask));
757 if (ushort(mask)) {
758 // found a match
759 return n + qCountTrailingZeroBits(mask);
760 }
761 }
762#endif // aarch64
763
764 return std::find(first: n, last: e, val: c);
765}
766
767/*!
768 * \internal
769 *
770 * Searches case-insensitively for character \a c in the string \a str and
771 * returns a pointer to it. Iif the character is not found, this function
772 * returns a pointer to the end of the string -- that is, \c{str.end()}.
773 */
774Q_NEVER_INLINE
775const char16_t *QtPrivate::qustrcasechr(QStringView str, char16_t c) noexcept
776{
777 const QChar *n = str.begin();
778 const QChar *e = str.end();
779 c = foldCase(ch: c);
780 auto it = std::find_if(first: n, last: e, pred: [c](auto ch) { return foldAndCompare(ch, QChar(c)); });
781 return reinterpret_cast<const char16_t *>(it);
782}
783
784// Note: ptr on output may be off by one and point to a preceding US-ASCII
785// character. Usually harmless.
786bool qt_is_ascii(const char *&ptr, const char *end) noexcept
787{
788#if defined(__SSE2__)
789 // Testing for the high bit can be done efficiently with just PMOVMSKB
790 bool loops = true;
791 if constexpr (UseAvx2) {
792 while (ptr + 32 <= end) {
793 __m256i data = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(ptr));
794 quint32 mask = _mm256_movemask_epi8(a: data);
795 if (mask) {
796 uint idx = qCountTrailingZeroBits(v: mask);
797 ptr += idx;
798 return false;
799 }
800 ptr += 32;
801 }
802 loops = false;
803 }
804
805 while (ptr + 16 <= end) {
806 __m128i data = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(ptr));
807 quint32 mask = _mm_movemask_epi8(a: data);
808 if (mask) {
809 uint idx = qCountTrailingZeroBits(v: mask);
810 ptr += idx;
811 return false;
812 }
813 ptr += 16;
814
815 if (!loops)
816 break;
817 }
818 if (ptr + 8 <= end) {
819 __m128i data = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(ptr));
820 quint8 mask = _mm_movemask_epi8(a: data);
821 if (mask) {
822 uint idx = qCountTrailingZeroBits(v: mask);
823 ptr += idx;
824 return false;
825 }
826 ptr += 8;
827 }
828#endif
829
830 while (ptr + 4 <= end) {
831 quint32 data = qFromUnaligned<quint32>(src: ptr);
832 if (data &= 0x80808080U) {
833 uint idx = QSysInfo::ByteOrder == QSysInfo::BigEndian
834 ? qCountLeadingZeroBits(v: data)
835 : qCountTrailingZeroBits(v: data);
836 ptr += idx / 8;
837 return false;
838 }
839 ptr += 4;
840 }
841
842 while (ptr != end) {
843 if (quint8(*ptr) & 0x80)
844 return false;
845 ++ptr;
846 }
847 return true;
848}
849
850bool QtPrivate::isAscii(QLatin1StringView s) noexcept
851{
852 const char *ptr = s.begin();
853 const char *end = s.end();
854
855 return qt_is_ascii(ptr, end);
856}
857
858static bool isAscii_helper(const char16_t *&ptr, const char16_t *end)
859{
860#ifdef __SSE2__
861 const char *ptr8 = reinterpret_cast<const char *>(ptr);
862 const char *end8 = reinterpret_cast<const char *>(end);
863 bool ok = simdTestMask(ptr&: ptr8, end: end8, maskval: 0xff80ff80);
864 ptr = reinterpret_cast<const char16_t *>(ptr8);
865 if (!ok)
866 return false;
867#endif
868
869 while (ptr != end) {
870 if (*ptr & 0xff80)
871 return false;
872 ++ptr;
873 }
874 return true;
875}
876
877bool QtPrivate::isAscii(QStringView s) noexcept
878{
879 const char16_t *ptr = s.utf16();
880 const char16_t *end = ptr + s.size();
881
882 return isAscii_helper(ptr, end);
883}
884
885bool QtPrivate::isLatin1(QStringView s) noexcept
886{
887 const char16_t *ptr = s.utf16();
888 const char16_t *end = ptr + s.size();
889
890#ifdef __SSE2__
891 const char *ptr8 = reinterpret_cast<const char *>(ptr);
892 const char *end8 = reinterpret_cast<const char *>(end);
893 if (!simdTestMask(ptr&: ptr8, end: end8, maskval: 0xff00ff00))
894 return false;
895 ptr = reinterpret_cast<const char16_t *>(ptr8);
896#endif
897
898 while (ptr != end) {
899 if (*ptr++ > 0xff)
900 return false;
901 }
902 return true;
903}
904
905bool QtPrivate::isValidUtf16(QStringView s) noexcept
906{
907 constexpr char32_t InvalidCodePoint = UINT_MAX;
908
909 QStringIterator i(s);
910 while (i.hasNext()) {
911 const char32_t c = i.next(invalidAs: InvalidCodePoint);
912 if (c == InvalidCodePoint)
913 return false;
914 }
915
916 return true;
917}
918
919// conversion between Latin 1 and UTF-16
920Q_CORE_EXPORT void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept
921{
922 /* SIMD:
923 * Unpacking with SSE has been shown to improve performance on recent CPUs
924 * The same method gives no improvement with NEON. On Aarch64, clang will do the vectorization
925 * itself in exactly the same way as one would do it with intrinsics.
926 */
927#if defined(__SSE2__)
928 // we're going to read str[offset..offset+15] (16 bytes)
929 const __m128i nullMask = _mm_setzero_si128();
930 auto processOneChunk = [=](qptrdiff offset) {
931 const __m128i chunk = _mm_loadu_si128(p: (const __m128i*)(str + offset)); // load
932 if constexpr (UseAvx2) {
933 // zero extend to an YMM register
934 const __m256i extended = _mm256_cvtepu8_epi16(V: chunk);
935
936 // store
937 _mm256_storeu_si256(p: (__m256i*)(dst + offset), a: extended);
938 } else {
939 // unpack the first 8 bytes, padding with zeros
940 const __m128i firstHalf = _mm_unpacklo_epi8(a: chunk, b: nullMask);
941 _mm_storeu_si128(p: (__m128i*)(dst + offset), b: firstHalf); // store
942
943 // unpack the last 8 bytes, padding with zeros
944 const __m128i secondHalf = _mm_unpackhi_epi8 (a: chunk, b: nullMask);
945 _mm_storeu_si128(p: (__m128i*)(dst + offset + 8), b: secondHalf); // store
946 }
947 };
948
949 const char *e = str + size;
950 if (size >= sizeof(__m128i)) {
951 qptrdiff offset = 0;
952 for ( ; str + offset + sizeof(__m128i) <= e; offset += sizeof(__m128i))
953 processOneChunk(offset);
954 if (str + offset < e)
955 processOneChunk(size - sizeof(__m128i));
956 return;
957 }
958
959# if !defined(__OPTIMIZE_SIZE__)
960 if (size >= 4) {
961 // two overlapped loads & stores, of either 64-bit or of 32-bit
962 if (size >= 8) {
963 const __m128i unpacked1 = mm_load8_zero_extend(ptr: str);
964 const __m128i unpacked2 = mm_load8_zero_extend(ptr: str + size - 8);
965 _mm_storeu_si128(p: reinterpret_cast<__m128i *>(dst), b: unpacked1);
966 _mm_storeu_si128(p: reinterpret_cast<__m128i *>(dst + size - 8), b: unpacked2);
967 } else {
968 const __m128i chunk1 = _mm_cvtsi32_si128(a: qFromUnaligned<quint32>(src: str));
969 const __m128i chunk2 = _mm_cvtsi32_si128(a: qFromUnaligned<quint32>(src: str + size - 4));
970 const __m128i unpacked1 = _mm_unpacklo_epi8(a: chunk1, b: nullMask);
971 const __m128i unpacked2 = _mm_unpacklo_epi8(a: chunk2, b: nullMask);
972 _mm_storel_epi64(p: reinterpret_cast<__m128i *>(dst), a: unpacked1);
973 _mm_storel_epi64(p: reinterpret_cast<__m128i *>(dst + size - 4), a: unpacked2);
974 }
975 return;
976 } else {
977 size = size % 4;
978 return UnrollTailLoop<3>::exec(count: qsizetype(size), code: [=](qsizetype i) { dst[i] = uchar(str[i]); });
979 }
980# endif
981#endif
982#if defined(__mips_dsp)
983 static_assert(sizeof(qsizetype) == sizeof(int),
984 "oops, the assembler implementation needs to be called in a loop");
985 if (size > 20)
986 qt_fromlatin1_mips_asm_unroll8(dst, str, size);
987 else
988 qt_fromlatin1_mips_asm_unroll4(dst, str, size);
989#else
990 while (size--)
991 *dst++ = (uchar)*str++;
992#endif
993}
994
995static QVarLengthArray<char16_t> qt_from_latin1_to_qvla(QLatin1StringView str)
996{
997 const qsizetype len = str.size();
998 QVarLengthArray<char16_t> arr(len);
999 qt_from_latin1(dst: arr.data(), str: str.data(), size: len);
1000 return arr;
1001}
1002
1003template <bool Checked>
1004static void qt_to_latin1_internal(uchar *dst, const char16_t *src, qsizetype length)
1005{
1006#if defined(__SSE2__)
1007 auto questionMark256 = []() {
1008 if constexpr (UseAvx2)
1009 return _mm256_broadcastw_epi16(X: _mm_cvtsi32_si128(a: '?'));
1010 else
1011 return 0;
1012 }();
1013 auto outOfRange256 = []() {
1014 if constexpr (UseAvx2)
1015 return _mm256_broadcastw_epi16(X: _mm_cvtsi32_si128(a: 0x100));
1016 else
1017 return 0;
1018 }();
1019 __m128i questionMark, outOfRange;
1020 if constexpr (UseAvx2) {
1021 questionMark = _mm256_castsi256_si128(questionMark256);
1022 outOfRange = _mm256_castsi256_si128(outOfRange256);
1023 } else {
1024 questionMark = _mm_set1_epi16(w: '?');
1025 outOfRange = _mm_set1_epi16(w: 0x100);
1026 }
1027
1028 auto mergeQuestionMarks = [=](__m128i chunk) {
1029 if (!Checked)
1030 return chunk;
1031
1032 // SSE has no compare instruction for unsigned comparison.
1033 if constexpr (UseSse4_1) {
1034 // We use an unsigned uc = qMin(uc, 0x100) and then compare for equality.
1035 chunk = _mm_min_epu16(V1: chunk, V2: outOfRange);
1036 const __m128i offLimitMask = _mm_cmpeq_epi16(a: chunk, b: outOfRange);
1037 chunk = _mm_blendv_epi8(V1: chunk, V2: questionMark, M: offLimitMask);
1038 return chunk;
1039 }
1040 // The variables must be shiffted + 0x8000 to be compared
1041 const __m128i signedBitOffset = _mm_set1_epi16(w: short(0x8000));
1042 const __m128i thresholdMask = _mm_set1_epi16(w: short(0xff + 0x8000));
1043
1044 const __m128i signedChunk = _mm_add_epi16(a: chunk, b: signedBitOffset);
1045 const __m128i offLimitMask = _mm_cmpgt_epi16(a: signedChunk, b: thresholdMask);
1046
1047 // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
1048 // the 16 bits that were correct contains zeros
1049 const __m128i offLimitQuestionMark = _mm_and_si128(a: offLimitMask, b: questionMark);
1050
1051 // correctBytes contains the bytes that were in limit
1052 // the 16 bits that were off limits contains zeros
1053 const __m128i correctBytes = _mm_andnot_si128(a: offLimitMask, b: chunk);
1054
1055 // merge offLimitQuestionMark and correctBytes to have the result
1056 chunk = _mm_or_si128(a: correctBytes, b: offLimitQuestionMark);
1057
1058 Q_UNUSED(outOfRange);
1059 return chunk;
1060 };
1061
1062 // we're going to read to src[offset..offset+15] (16 bytes)
1063 auto loadChunkAt = [=](qptrdiff offset) {
1064 __m128i chunk1, chunk2;
1065 if constexpr (UseAvx2) {
1066 __m256i chunk = _mm256_loadu_si256(p: reinterpret_cast<const __m256i *>(src + offset));
1067 if (Checked) {
1068 // See mergeQuestionMarks lambda above for details
1069 chunk = _mm256_min_epu16(chunk, outOfRange256);
1070 const __m256i offLimitMask = _mm256_cmpeq_epi16(chunk, outOfRange256);
1071 chunk = _mm256_blendv_epi8(chunk, questionMark256, offLimitMask);
1072 }
1073
1074 chunk2 = _mm256_extracti128_si256(chunk, 1);
1075 chunk1 = _mm256_castsi256_si128(a: chunk);
1076 } else {
1077 chunk1 = _mm_loadu_si128(p: (const __m128i*)(src + offset)); // load
1078 chunk1 = mergeQuestionMarks(chunk1);
1079
1080 chunk2 = _mm_loadu_si128(p: (const __m128i*)(src + offset + 8)); // load
1081 chunk2 = mergeQuestionMarks(chunk2);
1082 }
1083
1084 // pack the two vector to 16 x 8bits elements
1085 return _mm_packus_epi16(a: chunk1, b: chunk2);
1086 };
1087
1088 if (size_t(length) >= sizeof(__m128i)) {
1089 // because of possible overlapping, we won't process the last chunk in the loop
1090 qptrdiff offset = 0;
1091 for ( ; offset + 2 * sizeof(__m128i) < size_t(length); offset += sizeof(__m128i))
1092 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + offset), loadChunkAt(offset));
1093
1094 // overlapped conversion of the last full chunk and the tail
1095 __m128i last1 = loadChunkAt(offset);
1096 __m128i last2 = loadChunkAt(length - sizeof(__m128i));
1097 _mm_storeu_si128(p: reinterpret_cast<__m128i *>(dst + offset), b: last1);
1098 _mm_storeu_si128(p: reinterpret_cast<__m128i *>(dst + length - sizeof(__m128i)), b: last2);
1099 return;
1100 }
1101
1102# if !defined(__OPTIMIZE_SIZE__)
1103 if (length >= 4) {
1104 // this code is fine even for in-place conversion because we load both
1105 // before any store
1106 if (length >= 8) {
1107 __m128i chunk1 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src));
1108 __m128i chunk2 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(src + length - 8));
1109 chunk1 = mergeQuestionMarks(chunk1);
1110 chunk2 = mergeQuestionMarks(chunk2);
1111
1112 // pack, where the upper half is ignored
1113 const __m128i result1 = _mm_packus_epi16(a: chunk1, b: chunk1);
1114 const __m128i result2 = _mm_packus_epi16(a: chunk2, b: chunk2);
1115 _mm_storel_epi64(p: reinterpret_cast<__m128i *>(dst), a: result1);
1116 _mm_storel_epi64(p: reinterpret_cast<__m128i *>(dst + length - 8), a: result2);
1117 } else {
1118 __m128i chunk1 = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(src));
1119 __m128i chunk2 = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(src + length - 4));
1120 chunk1 = mergeQuestionMarks(chunk1);
1121 chunk2 = mergeQuestionMarks(chunk2);
1122
1123 // pack, we'll zero the upper three quarters
1124 const __m128i result1 = _mm_packus_epi16(a: chunk1, b: chunk1);
1125 const __m128i result2 = _mm_packus_epi16(a: chunk2, b: chunk2);
1126 qToUnaligned(src: _mm_cvtsi128_si32(a: result1), dest: dst);
1127 qToUnaligned(src: _mm_cvtsi128_si32(a: result2), dest: dst + length - 4);
1128 }
1129 return;
1130 }
1131
1132 length = length % 4;
1133 return UnrollTailLoop<3>::exec(length, [=](qsizetype i) {
1134 if (Checked)
1135 dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i];
1136 else
1137 dst[i] = src[i];
1138 });
1139# else
1140 length = length % 16;
1141# endif // optimize size
1142#elif defined(__ARM_NEON__)
1143 // Refer to the documentation of the SSE2 implementation.
1144 // This uses exactly the same method as for SSE except:
1145 // 1) neon has unsigned comparison
1146 // 2) packing is done to 64 bits (8 x 8bits component).
1147 if (length >= 16) {
1148 const qsizetype chunkCount = length >> 3; // divided by 8
1149 const uint16x8_t questionMark = vdupq_n_u16('?'); // set
1150 const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
1151 for (qsizetype i = 0; i < chunkCount; ++i) {
1152 uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
1153 src += 8;
1154
1155 if (Checked) {
1156 const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
1157 const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
1158 const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
1159 chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
1160 }
1161 const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
1162 vst1_u8(dst, result); // store
1163 dst += 8;
1164 }
1165 length = length % 8;
1166 }
1167#endif
1168#if defined(__mips_dsp)
1169 static_assert(sizeof(qsizetype) == sizeof(int),
1170 "oops, the assembler implementation needs to be called in a loop");
1171 qt_toLatin1_mips_dsp_asm(dst, src, length);
1172#else
1173 while (length--) {
1174 if (Checked)
1175 *dst++ = (*src>0xff) ? '?' : (uchar) *src;
1176 else
1177 *dst++ = *src;
1178 ++src;
1179 }
1180#endif
1181}
1182
1183void qt_to_latin1(uchar *dst, const char16_t *src, qsizetype length)
1184{
1185 qt_to_latin1_internal<true>(dst, src, length);
1186}
1187
1188void qt_to_latin1_unchecked(uchar *dst, const char16_t *src, qsizetype length)
1189{
1190 qt_to_latin1_internal<false>(dst, src, length);
1191}
1192
1193// Unicode case-insensitive comparison (argument order matches QStringView)
1194Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char16_t *b)
1195{
1196 if (a == b)
1197 return qt_lencmp(lhs: alen, rhs: blen);
1198
1199 char32_t alast = 0;
1200 char32_t blast = 0;
1201 qsizetype l = qMin(a: alen, b: blen);
1202 qsizetype i;
1203 for (i = 0; i < l; ++i) {
1204// qDebug() << Qt::hex << alast << blast;
1205// qDebug() << Qt::hex << "*a=" << *a << "alast=" << alast << "folded=" << foldCase (*a, alast);
1206// qDebug() << Qt::hex << "*b=" << *b << "blast=" << blast << "folded=" << foldCase (*b, blast);
1207 int diff = foldCase(ch: a[i], last&: alast) - foldCase(ch: b[i], last&: blast);
1208 if ((diff))
1209 return diff;
1210 }
1211 if (i == alen) {
1212 if (i == blen)
1213 return 0;
1214 return -1;
1215 }
1216 return 1;
1217}
1218
1219// Case-insensitive comparison between a QStringView and a QLatin1StringView
1220// (argument order matches those types)
1221Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char *b)
1222{
1223 qsizetype l = qMin(a: alen, b: blen);
1224 qsizetype i;
1225 for (i = 0; i < l; ++i) {
1226 int diff = foldCase(ch: a[i]) - foldCase(ch: char16_t{uchar(b[i])});
1227 if ((diff))
1228 return diff;
1229 }
1230 if (i == alen) {
1231 if (i == blen)
1232 return 0;
1233 return -1;
1234 }
1235 return 1;
1236}
1237
1238// Case-insensitive comparison between a Unicode string and a UTF-8 string
1239Q_NEVER_INLINE static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
1240{
1241 auto src1 = reinterpret_cast<const uchar *>(utf8);
1242 auto end1 = reinterpret_cast<const uchar *>(utf8end);
1243 QStringIterator src2(utf16, utf16end);
1244
1245 while (src1 < end1 && src2.hasNext()) {
1246 char32_t uc1 = 0;
1247 char32_t *output = &uc1;
1248 uchar b = *src1++;
1249 const qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst&: output, src&: src1, end: end1);
1250 if (res < 0) {
1251 // decoding error
1252 uc1 = QChar::ReplacementCharacter;
1253 } else {
1254 uc1 = QChar::toCaseFolded(ucs4: uc1);
1255 }
1256
1257 char32_t uc2 = QChar::toCaseFolded(ucs4: src2.next());
1258 int diff = uc1 - uc2; // can't underflow
1259 if (diff)
1260 return diff;
1261 }
1262
1263 // the shorter string sorts first
1264 return (end1 > src1) - int(src2.hasNext());
1265}
1266
1267#if defined(__mips_dsp)
1268// From qstring_mips_dsp_asm.S
1269extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a,
1270 const char16_t *b,
1271 unsigned len);
1272#endif
1273
1274// Unicode case-sensitive compare two same-sized strings
1275template <StringComparisonMode Mode>
1276static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
1277{
1278 // This function isn't memcmp() because that can return the wrong sorting
1279 // result in little-endian architectures: 0x00ff must sort before 0x0100,
1280 // but the bytes in memory are FF 00 and 00 01.
1281
1282#ifndef __OPTIMIZE_SIZE__
1283# if defined(__mips_dsp)
1284 static_assert(sizeof(uint) == sizeof(size_t));
1285 if (l >= 8) {
1286 return qt_ucstrncmp_mips_dsp_asm(a, b, l);
1287 }
1288# elif defined(__SSE2__)
1289 return ucstrncmp_sse2<Mode>(a, b, l);
1290# elif defined(__ARM_NEON__)
1291 if (l >= 8) {
1292 const char16_t *end = a + l;
1293 const uint16x8_t mask = qvsetq_n_u16( 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 );
1294 while (end - a > 7) {
1295 uint16x8_t da = vld1q_u16(reinterpret_cast<const uint16_t *>(a));
1296 uint16x8_t db = vld1q_u16(reinterpret_cast<const uint16_t *>(b));
1297
1298 uint8_t r = ~(uint8_t)vaddvq_u16(vandq_u16(vceqq_u16(da, db), mask));
1299 if (r) {
1300 // found a different QChar
1301 if (Mode == CompareStringsForEquality)
1302 return 1;
1303 uint idx = qCountTrailingZeroBits(r);
1304 return a[idx] - b[idx];
1305 }
1306 a += 8;
1307 b += 8;
1308 }
1309 l &= 7;
1310 }
1311 const auto lambda = [=](size_t i) -> int {
1312 return a[i] - b[i];
1313 };
1314 return UnrollTailLoop<7>::exec(l, 0, lambda, lambda);
1315# endif // MIPS DSP or __SSE2__ or __ARM_NEON__
1316#endif // __OPTIMIZE_SIZE__
1317
1318 if (Mode == CompareStringsForEquality || QSysInfo::ByteOrder == QSysInfo::BigEndian)
1319 return memcmp(s1: a, s2: b, n: l * sizeof(char16_t));
1320
1321 for (size_t i = 0; i < l; ++i) {
1322 if (int diff = a[i] - b[i])
1323 return diff;
1324 }
1325 return 0;
1326}
1327
1328template <StringComparisonMode Mode>
1329static int ucstrncmp(const char16_t *a, const char *b, size_t l)
1330{
1331 const uchar *c = reinterpret_cast<const uchar *>(b);
1332 const char16_t *uc = a;
1333 const char16_t *e = uc + l;
1334
1335#if defined(__SSE2__) && !defined(__OPTIMIZE_SIZE__)
1336 return ucstrncmp_sse2<Mode>(uc, c, l);
1337#endif
1338
1339 while (uc < e) {
1340 int diff = *uc - *c;
1341 if (diff)
1342 return diff;
1343 uc++, c++;
1344 }
1345
1346 return 0;
1347}
1348
1349// Unicode case-sensitive equality
1350template <typename Char2>
1351static bool ucstreq(const char16_t *a, size_t alen, const Char2 *b)
1352{
1353 if constexpr (std::is_same_v<decltype(a), decltype(b)>) {
1354 if (a == b)
1355 return true;
1356 }
1357 return ucstrncmp<CompareStringsForEquality>(a, b, alen) == 0;
1358}
1359
1360// Unicode case-sensitive comparison
1361template <typename Char2>
1362static int ucstrcmp(const char16_t *a, size_t alen, const Char2 *b, size_t blen)
1363{
1364 if constexpr (std::is_same_v<decltype(a), decltype(b)>) {
1365 if (a == b && alen == blen)
1366 return 0;
1367 }
1368 const size_t l = qMin(a: alen, b: blen);
1369 int cmp = ucstrncmp<CompareStringsForOrdering>(a, b, l);
1370 return cmp ? cmp : qt_lencmp(lhs: alen, rhs: blen);
1371}
1372
1373using CaseInsensitiveL1 = QtPrivate::QCaseInsensitiveLatin1Hash;
1374
1375static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
1376{
1377 // We're called with QLatin1StringView's .data() and .size():
1378 Q_ASSERT(lSize >= 0 && rSize >= 0);
1379 if (!lSize)
1380 return rSize ? -1 : 0;
1381 if (!rSize)
1382 return 1;
1383 const qsizetype size = std::min(a: lSize, b: rSize);
1384
1385 Q_ASSERT(lhsChar && rhsChar); // since both lSize and rSize are positive
1386 for (qsizetype i = 0; i < size; i++) {
1387 if (int res = CaseInsensitiveL1::difference(lhs: lhsChar[i], rhs: rhsChar[i]))
1388 return res;
1389 }
1390 return qt_lencmp(lhs: lSize, rhs: rSize);
1391}
1392
1393bool QtPrivate::equalStrings(QStringView lhs, QStringView rhs) noexcept
1394{
1395 Q_ASSERT(lhs.size() == rhs.size());
1396 return ucstreq(a: lhs.utf16(), alen: lhs.size(), b: rhs.utf16());
1397}
1398
1399bool QtPrivate::equalStrings(QStringView lhs, QLatin1StringView rhs) noexcept
1400{
1401 Q_ASSERT(lhs.size() == rhs.size());
1402 return ucstreq(a: lhs.utf16(), alen: lhs.size(), b: rhs.latin1());
1403}
1404
1405bool QtPrivate::equalStrings(QLatin1StringView lhs, QStringView rhs) noexcept
1406{
1407 return QtPrivate::equalStrings(lhs: rhs, rhs: lhs);
1408}
1409
1410bool QtPrivate::equalStrings(QLatin1StringView lhs, QLatin1StringView rhs) noexcept
1411{
1412 Q_ASSERT(lhs.size() == rhs.size());
1413 return (!lhs.size() || memcmp(s1: lhs.data(), s2: rhs.data(), n: lhs.size()) == 0);
1414}
1415
1416bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QStringView rhs) noexcept
1417{
1418 return QUtf8::compareUtf8(utf8: lhs, utf16: rhs) == 0;
1419}
1420
1421bool QtPrivate::equalStrings(QStringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1422{
1423 return QtPrivate::equalStrings(lhs: rhs, rhs: lhs);
1424}
1425
1426bool QtPrivate::equalStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1427{
1428 return QUtf8::compareUtf8(utf8: QByteArrayView(rhs), s: lhs) == 0;
1429}
1430
1431bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs) noexcept
1432{
1433 return QtPrivate::equalStrings(lhs: rhs, rhs: lhs);
1434}
1435
1436bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs) noexcept
1437{
1438#if QT_VERSION >= QT_VERSION_CHECK(7, 0, 0) || defined(QT_BOOTSTRAPPED) || defined(QT_STATIC)
1439 Q_ASSERT(lhs.size() == rhs.size());
1440#else
1441 // operator== didn't enforce size prior to Qt 6.2
1442 if (lhs.size() != rhs.size())
1443 return false;
1444#endif
1445 return (!lhs.size() || memcmp(s1: lhs.data(), s2: rhs.data(), n: lhs.size()) == 0);
1446}
1447
1448bool QAnyStringView::equal(QAnyStringView lhs, QAnyStringView rhs) noexcept
1449{
1450 if (lhs.size() != rhs.size() && lhs.isUtf8() == rhs.isUtf8())
1451 return false;
1452 return lhs.visit(v: [rhs](auto lhs) {
1453 return rhs.visit([lhs](auto rhs) {
1454 return QtPrivate::equalStrings(lhs, rhs);
1455 });
1456 });
1457}
1458
1459/*!
1460 \relates QStringView
1461 \internal
1462 \since 5.10
1463
1464 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1465
1466 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1467
1468 Case-sensitive comparison is based exclusively on the numeric Unicode values
1469 of the characters and is very fast, but is not what a human would expect.
1470 Consider sorting user-visible strings with QString::localeAwareCompare().
1471
1472 \sa {Comparing Strings}
1473*/
1474int QtPrivate::compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1475{
1476 if (cs == Qt::CaseSensitive)
1477 return ucstrcmp(a: lhs.utf16(), alen: lhs.size(), b: rhs.utf16(), blen: rhs.size());
1478 return ucstricmp(alen: lhs.size(), a: lhs.utf16(), blen: rhs.size(), b: rhs.utf16());
1479}
1480
1481/*!
1482 \relates QStringView
1483 \internal
1484 \since 5.10
1485 \overload
1486
1487 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1488
1489 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1490
1491 Case-sensitive comparison is based exclusively on the numeric Unicode values
1492 of the characters and is very fast, but is not what a human would expect.
1493 Consider sorting user-visible strings with QString::localeAwareCompare().
1494
1495 \sa {Comparing Strings}
1496*/
1497int QtPrivate::compareStrings(QStringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1498{
1499 if (cs == Qt::CaseSensitive)
1500 return ucstrcmp(a: lhs.utf16(), alen: lhs.size(), b: rhs.latin1(), blen: rhs.size());
1501 return ucstricmp(alen: lhs.size(), a: lhs.utf16(), blen: rhs.size(), b: rhs.latin1());
1502}
1503
1504/*!
1505 \relates QStringView
1506 \internal
1507 \since 6.0
1508 \overload
1509*/
1510int QtPrivate::compareStrings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1511{
1512 return -compareStrings(lhs: rhs, rhs: lhs, cs);
1513}
1514
1515/*!
1516 \relates QStringView
1517 \internal
1518 \since 5.10
1519 \overload
1520*/
1521int QtPrivate::compareStrings(QLatin1StringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1522{
1523 return -compareStrings(lhs: rhs, rhs: lhs, cs);
1524}
1525
1526/*!
1527 \relates QStringView
1528 \internal
1529 \since 5.10
1530 \overload
1531
1532 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1533
1534 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1535
1536 Case-sensitive comparison is based exclusively on the numeric Latin-1 values
1537 of the characters and is very fast, but is not what a human would expect.
1538 Consider sorting user-visible strings with QString::localeAwareCompare().
1539
1540 \sa {Comparing Strings}
1541*/
1542int QtPrivate::compareStrings(QLatin1StringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1543{
1544 if (lhs.isEmpty())
1545 return qt_lencmp(lhs: qsizetype(0), rhs: rhs.size());
1546 if (rhs.isEmpty())
1547 return qt_lencmp(lhs: lhs.size(), rhs: qsizetype(0));
1548 if (cs == Qt::CaseInsensitive)
1549 return latin1nicmp(lhsChar: lhs.data(), lSize: lhs.size(), rhsChar: rhs.data(), rSize: rhs.size());
1550 const auto l = std::min(a: lhs.size(), b: rhs.size());
1551 int r = memcmp(s1: lhs.data(), s2: rhs.data(), n: l);
1552 return r ? r : qt_lencmp(lhs: lhs.size(), rhs: rhs.size());
1553}
1554
1555/*!
1556 \relates QStringView
1557 \internal
1558 \since 6.0
1559 \overload
1560*/
1561int QtPrivate::compareStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1562{
1563 return -QUtf8::compareUtf8(utf8: QByteArrayView(rhs), s: lhs, cs);
1564}
1565
1566/*!
1567 \relates QStringView
1568 \internal
1569 \since 6.0
1570 \overload
1571*/
1572int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1573{
1574 if (cs == Qt::CaseSensitive)
1575 return QUtf8::compareUtf8(utf8: lhs, utf16: rhs);
1576 return ucstricmp8(utf8: lhs.begin(), utf8end: lhs.end(), utf16: rhs.begin(), utf16end: rhs.end());
1577}
1578
1579/*!
1580 \relates QStringView
1581 \internal
1582 \since 6.0
1583 \overload
1584*/
1585int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1586{
1587 return -compareStrings(lhs: rhs, rhs: lhs, cs);
1588}
1589
1590/*!
1591 \relates QStringView
1592 \internal
1593 \since 6.0
1594 \overload
1595*/
1596int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1597{
1598 return QUtf8::compareUtf8(lhs: QByteArrayView(lhs), rhs: QByteArrayView(rhs), cs);
1599}
1600
1601int QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs) noexcept
1602{
1603 return lhs.visit(v: [rhs, cs](auto lhs) {
1604 return rhs.visit([lhs, cs](auto rhs) {
1605 return QtPrivate::compareStrings(lhs, rhs, cs);
1606 });
1607 });
1608}
1609
1610// ### Qt 7: do not allow anything but ASCII digits
1611// in arg()'s replacements.
1612#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1613static bool supportUnicodeDigitValuesInArg()
1614{
1615 static const bool result = []() {
1616 static const char supportUnicodeDigitValuesEnvVar[]
1617 = "QT_USE_UNICODE_DIGIT_VALUES_IN_STRING_ARG";
1618
1619 if (qEnvironmentVariableIsSet(varName: supportUnicodeDigitValuesEnvVar))
1620 return qEnvironmentVariableIntValue(varName: supportUnicodeDigitValuesEnvVar) != 0;
1621
1622#if QT_VERSION < QT_VERSION_CHECK(6, 6, 0) // keep it in sync with the test
1623 return true;
1624#else
1625 return false;
1626#endif
1627 }();
1628
1629 return result;
1630}
1631#endif
1632
1633static int qArgDigitValue(QChar ch) noexcept
1634{
1635#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1636 if (supportUnicodeDigitValuesInArg())
1637 return ch.digitValue();
1638#endif
1639 if (ch >= u'0' && ch <= u'9')
1640 return int(ch.unicode() - u'0');
1641 return -1;
1642}
1643
1644#if QT_CONFIG(regularexpression)
1645Q_DECL_COLD_FUNCTION
1646void qtWarnAboutInvalidRegularExpression(const QString &pattern, const char *where);
1647#endif
1648
1649/*!
1650 \macro QT_RESTRICTED_CAST_FROM_ASCII
1651 \relates QString
1652
1653 Disables most automatic conversions from source literals and 8-bit data
1654 to unicode QStrings, but allows the use of
1655 the \c{QChar(char)} and \c{QString(const char (&ch)[N]} constructors,
1656 and the \c{QString::operator=(const char (&ch)[N])} assignment operator.
1657 This gives most of the type-safety benefits of \l QT_NO_CAST_FROM_ASCII
1658 but does not require user code to wrap character and string literals
1659 with QLatin1Char, QLatin1StringView or similar.
1660
1661 Using this macro together with source strings outside the 7-bit range,
1662 non-literals, or literals with embedded NUL characters is undefined.
1663
1664 \sa QT_NO_CAST_FROM_ASCII, QT_NO_CAST_TO_ASCII
1665*/
1666
1667/*!
1668 \macro QT_NO_CAST_FROM_ASCII
1669 \relates QString
1670 \relates QChar
1671
1672 Disables automatic conversions from 8-bit strings (\c{char *}) to Unicode
1673 QStrings, as well as from 8-bit \c{char} types (\c{char} and
1674 \c{unsigned char}) to QChar.
1675
1676 \sa QT_NO_CAST_TO_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1677 QT_NO_CAST_FROM_BYTEARRAY
1678*/
1679
1680/*!
1681 \macro QT_NO_CAST_TO_ASCII
1682 \relates QString
1683
1684 Disables automatic conversion from QString to 8-bit strings (\c{char *}).
1685
1686 \sa QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1687 QT_NO_CAST_FROM_BYTEARRAY
1688*/
1689
1690/*!
1691 \macro QT_ASCII_CAST_WARNINGS
1692 \internal
1693 \relates QString
1694
1695 This macro can be defined to force a warning whenever a function is
1696 called that automatically converts between unicode and 8-bit encodings.
1697
1698 Note: This only works for compilers that support warnings for
1699 deprecated API.
1700
1701 \sa QT_NO_CAST_TO_ASCII, QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII
1702*/
1703
1704/*!
1705 \class QString
1706 \inmodule QtCore
1707 \reentrant
1708
1709 \brief The QString class provides a Unicode character string.
1710
1711 \ingroup tools
1712 \ingroup shared
1713 \ingroup string-processing
1714
1715 \compares strong
1716 \compareswith strong QChar QLatin1StringView {const char16_t *} \
1717 QStringView QUtf8StringView
1718 \endcompareswith
1719 \compareswith strong QByteArray QByteArrayView {const char *}
1720 When comparing with byte arrays, their content is interpreted as utf-8.
1721 \endcompareswith
1722
1723 QString stores a string of 16-bit \l{QChar}s, where each QChar
1724 corresponds to one UTF-16 code unit. (Unicode characters
1725 with code values above 65535 are stored using surrogate pairs,
1726 that is, two consecutive \l{QChar}s.)
1727
1728 \l{Unicode} is an international standard that supports most of the
1729 writing systems in use today. It is a superset of US-ASCII (ANSI
1730 X3.4-1986) and Latin-1 (ISO 8859-1), and all the US-ASCII/Latin-1
1731 characters are available at the same code positions.
1732
1733 Behind the scenes, QString uses \l{implicit sharing}
1734 (copy-on-write) to reduce memory usage and to avoid the needless
1735 copying of data. This also helps reduce the inherent overhead of
1736 storing 16-bit characters instead of 8-bit characters.
1737
1738 In addition to QString, Qt also provides the QByteArray class to
1739 store raw bytes and traditional 8-bit '\\0'-terminated strings.
1740 For most purposes, QString is the class you want to use. It is
1741 used throughout the Qt API, and the Unicode support ensures that
1742 your applications are easy to translate if you want to expand
1743 your application's market at some point. Two prominent cases
1744 where QByteArray is appropriate are when you need to store raw
1745 binary data, and when memory conservation is critical (like in
1746 embedded systems).
1747
1748 \section1 Initializing a string
1749
1750 One way to initialize a QString is to pass a \c{const char
1751 *} to its constructor. For example, the following code creates a
1752 QString of size 5 containing the data "Hello":
1753
1754 \snippet qstring/main.cpp 0
1755
1756 QString converts the \c{const char *} data into Unicode using the
1757 fromUtf8() function.
1758
1759 In all of the QString functions that take \c{const char *}
1760 parameters, the \c{const char *} is interpreted as a classic
1761 C-style \c{'\\0'}-terminated string. Except where the function's
1762 name overtly indicates some other encoding, such \c{const char *}
1763 parameters are assumed to be encoded in UTF-8.
1764
1765 You can also provide string data as an array of \l{QChar}s:
1766
1767 \snippet qstring/main.cpp 1
1768
1769 QString makes a deep copy of the QChar data, so you can modify it
1770 later without experiencing side effects. You can avoid taking a
1771 deep copy of the character data by using QStringView or
1772 QString::fromRawData() instead.
1773
1774 Another approach is to set the size of the string using resize()
1775 and to initialize the data character per character. QString uses
1776 0-based indexes, just like C++ arrays. To access the character at
1777 a particular index position, you can use \l operator[](). On
1778 non-\c{const} strings, \l operator[]() returns a reference to a
1779 character that can be used on the left side of an assignment. For
1780 example:
1781
1782 \snippet qstring/main.cpp 2
1783
1784 For read-only access, an alternative syntax is to use the at()
1785 function:
1786
1787 \snippet qstring/main.cpp 3
1788
1789 The at() function can be faster than \l operator[]() because it
1790 never causes a \l{deep copy} to occur. Alternatively, use the
1791 first(), last(), or sliced() functions to extract several characters
1792 at a time.
1793
1794 A QString can embed '\\0' characters (QChar::Null). The size()
1795 function always returns the size of the whole string, including
1796 embedded '\\0' characters.
1797
1798 After a call to the resize() function, newly allocated characters
1799 have undefined values. To set all the characters in the string to
1800 a particular value, use the fill() function.
1801
1802 QString provides dozens of overloads designed to simplify string
1803 usage. For example, if you want to compare a QString with a string
1804 literal, you can write code like this and it will work as expected:
1805
1806 \snippet qstring/main.cpp 4
1807
1808 You can also pass string literals to functions that take QStrings
1809 as arguments, invoking the QString(const char *)
1810 constructor. Similarly, you can pass a QString to a function that
1811 takes a \c{const char *} argument using the \l qPrintable() macro,
1812 which returns the given QString as a \c{const char *}. This is
1813 equivalent to calling <QString>.toLocal8Bit().constData().
1814
1815 \section1 Manipulating string data
1816
1817 QString provides the following basic functions for modifying the
1818 character data: append(), prepend(), insert(), replace(), and
1819 remove(). For example:
1820
1821 \snippet qstring/main.cpp 5
1822
1823 In the above example, the replace() function's first two arguments are the
1824 position from which to start replacing and the number of characters that
1825 should be replaced.
1826
1827 When data-modifying functions increase the size of the string,
1828 QString may reallocate the memory in which it holds its data. When
1829 this happens, QString expands by more than it immediately needs so as
1830 to have space for further expansion without reallocation until the size
1831 of the string has significantly increased.
1832
1833 The insert(), remove(), and, when replacing a sub-string with one of
1834 different size, replace() functions can be slow (\l{linear time}) for
1835 large strings because they require moving many characters in the string
1836 by at least one position in memory.
1837
1838 If you are building a QString gradually and know in advance
1839 approximately how many characters the QString will contain, you
1840 can call reserve(), asking QString to preallocate a certain amount
1841 of memory. You can also call capacity() to find out how much
1842 memory the QString actually has allocated.
1843
1844 QString provides \l{STL-style iterators} (QString::const_iterator and
1845 QString::iterator). In practice, iterators are handy when working with
1846 generic algorithms provided by the C++ standard library.
1847
1848 \note Iterators over a QString, and references to individual characters
1849 within one, cannot be relied on to remain valid when any non-\c{const}
1850 method of the QString is called. Accessing such an iterator or reference
1851 after the call to a non-\c{const} method leads to undefined behavior. When
1852 stability for iterator-like functionality is required, you should use
1853 indexes instead of iterators, as they are not tied to QString's internal
1854 state and thus do not get invalidated.
1855
1856 \note Due to \l{implicit sharing}, the first non-\c{const} operator or
1857 function used on a given QString may cause it to internally perform a deep
1858 copy of its data. This invalidates all iterators over the string and
1859 references to individual characters within it. Do not call non-const
1860 functions while keeping iterators. Accessing an iterator or reference
1861 after it has been invalidated leads to undefined behavior. See the
1862 \l{Implicit sharing iterator problem} section for more information.
1863
1864 A frequent requirement is to remove or simplify the spacing between
1865 visible characters in a string. The characters that make up that spacing
1866 are those for which \l {QChar::}{isSpace()} returns \c true, such as
1867 the simple space \c{' '}, the horizontal tab \c{'\\t'} and the newline \c{'\\n'}.
1868 To obtain a copy of a string leaving out any spacing from its start and end,
1869 use \l trimmed(). To also replace each sequence of spacing characters within
1870 the string with a simple space, \c{' '}, use \l simplified().
1871
1872 If you want to find all occurrences of a particular character or
1873 substring in a QString, use the indexOf() or lastIndexOf()
1874 functions.The former searches forward, the latter searches backward.
1875 Either can be told an index position from which to start their search.
1876 Each returns the index position of the character or substring if they
1877 find it; otherwise, they return -1. For example, here is a typical loop
1878 that finds all occurrences of a particular substring:
1879
1880 \snippet qstring/main.cpp 6
1881
1882 QString provides many functions for converting numbers into
1883 strings and strings into numbers. See the arg() functions, the
1884 setNum() functions, the number() static functions, and the
1885 toInt(), toDouble(), and similar functions.
1886
1887 To get an uppercase or lowercase version of a string, use toUpper() or
1888 toLower().
1889
1890 Lists of strings are handled by the QStringList class. You can
1891 split a string into a list of strings using the split() function,
1892 and join a list of strings into a single string with an optional
1893 separator using QStringList::join(). You can obtain a filtered list
1894 from a string list by selecting the entries in it that contain a
1895 particular substring or match a particular QRegularExpression.
1896 See QStringList::filter() for details.
1897
1898 \section1 Querying string data
1899
1900 To see if a QString starts or ends with a particular substring, use
1901 startsWith() or endsWith(). To check whether a QString contains a
1902 specific character or substring, use the contains() function. To
1903 find out how many times a particular character or substring occurs
1904 in a string, use count().
1905
1906 To obtain a pointer to the actual character data, call data() or
1907 constData(). These functions return a pointer to the beginning of
1908 the QChar data. The pointer is guaranteed to remain valid until a
1909 non-\c{const} function is called on the QString.
1910
1911 \section2 Comparing strings
1912
1913 QStrings can be compared using overloaded operators such as \l
1914 operator<(), \l operator<=(), \l operator==(), \l operator>=(),
1915 and so on. The comparison is based exclusively on the lexicographical
1916 order of the two strings, seen as sequences of UTF-16 code units.
1917 It is very fast but is not what a human would expect; the
1918 QString::localeAwareCompare() function is usually a better choice for
1919 sorting user-interface strings, when such a comparison is available.
1920
1921 When Qt is linked with the ICU library (which it usually is), its
1922 locale-aware sorting is used. Otherwise, platform-specific solutions
1923 are used:
1924 \list
1925 \li On Windows, localeAwareCompare() uses the current user locale,
1926 as set in the \uicontrol{regional} and \uicontrol{language}
1927 options portion of \uicontrol{Control Panel}.
1928 \li On \macos and iOS, \l localeAwareCompare() compares according
1929 to the \uicontrol{Order for sorted lists} setting in the
1930 \uicontrol{International preferences} panel.
1931 \li On other Unix-like systems, the comparison falls back to the
1932 system library's \c strcoll().
1933 \endlist
1934
1935 \section1 Converting between encoded string data and QString
1936
1937 QString provides the following functions that return a
1938 \c{const char *} version of the string as QByteArray: toUtf8(),
1939 toLatin1(), and toLocal8Bit().
1940
1941 \list
1942 \li toLatin1() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
1943 \li toUtf8() returns a UTF-8 encoded 8-bit string. UTF-8 is a
1944 superset of US-ASCII (ANSI X3.4-1986) that supports the entire
1945 Unicode character set through multibyte sequences.
1946 \li toLocal8Bit() returns an 8-bit string using the system's local
1947 encoding. This is the same as toUtf8() on Unix systems.
1948 \endlist
1949
1950 To convert from one of these encodings, QString provides
1951 fromLatin1(), fromUtf8(), and fromLocal8Bit(). Other
1952 encodings are supported through the QStringEncoder and QStringDecoder
1953 classes.
1954
1955 As mentioned above, QString provides a lot of functions and
1956 operators that make it easy to interoperate with \c{const char *}
1957 strings. But this functionality is a double-edged sword: It makes
1958 QString more convenient to use if all strings are US-ASCII or
1959 Latin-1, but there is always the risk that an implicit conversion
1960 from or to \c{const char *} is done using the wrong 8-bit
1961 encoding. To minimize these risks, you can turn off these implicit
1962 conversions by defining some of the following preprocessor symbols:
1963
1964 \list
1965 \li \l QT_NO_CAST_FROM_ASCII disables automatic conversions from
1966 C string literals and pointers to Unicode.
1967 \li \l QT_RESTRICTED_CAST_FROM_ASCII allows automatic conversions
1968 from C characters and character arrays but disables automatic
1969 conversions from character pointers to Unicode.
1970 \li \l QT_NO_CAST_TO_ASCII disables automatic conversion from QString
1971 to C strings.
1972 \endlist
1973
1974 You then need to explicitly call fromUtf8(), fromLatin1(),
1975 or fromLocal8Bit() to construct a QString from an
1976 8-bit string, or use the lightweight QLatin1StringView class. For
1977 example:
1978
1979 \snippet code/src_corelib_text_qstring.cpp 1
1980
1981 Similarly, you must call toLatin1(), toUtf8(), or
1982 toLocal8Bit() explicitly to convert the QString to an 8-bit
1983 string.
1984
1985 \table 100 %
1986 \header
1987 \li Note for C Programmers
1988
1989 \row
1990 \li
1991 Due to C++'s type system and the fact that QString is
1992 \l{implicitly shared}, QStrings may be treated like \c{int}s or
1993 other basic types. For example:
1994
1995 \snippet qstring/main.cpp 7
1996
1997 The \c result variable is a normal variable allocated on the
1998 stack. When \c return is called, and because we're returning by
1999 value, the copy constructor is called and a copy of the string is
2000 returned. No actual copying takes place thanks to the implicit
2001 sharing.
2002
2003 \endtable
2004
2005 \section1 Distinction between null and empty strings
2006
2007 For historical reasons, QString distinguishes between null
2008 and empty strings. A \e null string is a string that is
2009 initialized using QString's default constructor or by passing
2010 \nullptr to the constructor. An \e empty string is any
2011 string with size 0. A null string is always empty, but an empty
2012 string isn't necessarily null:
2013
2014 \snippet qstring/main.cpp 8
2015
2016 All functions except isNull() treat null strings the same as empty
2017 strings. For example, toUtf8().constData() returns a valid pointer
2018 (not \nullptr) to a '\\0' character for a null string. We
2019 recommend that you always use the isEmpty() function and avoid isNull().
2020
2021 \section1 Number formats
2022
2023 When a QString::arg() \c{'%'} format specifier includes the \c{'L'} locale
2024 qualifier, and the base is ten (its default), the default locale is
2025 used. This can be set using \l{QLocale::setDefault()}. For more refined
2026 control of localized string representations of numbers, see
2027 QLocale::toString(). All other number formatting done by QString follows the
2028 C locale's representation of numbers.
2029
2030 When QString::arg() applies left-padding to numbers, the fill character
2031 \c{'0'} is treated specially. If the number is negative, its minus sign
2032 appears before the zero-padding. If the field is localized, the
2033 locale-appropriate zero character is used in place of \c{'0'}. For
2034 floating-point numbers, this special treatment only applies if the number is
2035 finite.
2036
2037 \section2 Floating-point formats
2038
2039 In member functions (for example, arg() and number()) that format floating-point
2040 numbers (\c float or \c double) as strings, the representation used can be
2041 controlled by a choice of \e format and \e precision, whose meanings are as
2042 for \l {QLocale::toString(double, char, int)}.
2043
2044 If the selected \e format includes an exponent, localized forms follow the
2045 locale's convention on digits in the exponent. For non-localized formatting,
2046 the exponent shows its sign and includes at least two digits, left-padding
2047 with zero if needed.
2048
2049 \section1 More efficient string construction
2050
2051 Many strings are known at compile time. The QString constructor from
2052 C++ string literals will copy the contents of the string,
2053 treating the contents as UTF-8. This requires memory allocation and
2054 re-encoding string data, operations that will happen at runtime.
2055 If the string data is known at compile time, you can use the QStringLiteral
2056 macro or similarly \c{operator""_s} to create QString's payload at compile
2057 time instead.
2058
2059 Using the QString \c{'+'} operator, it is easy to construct a
2060 complex string from multiple substrings. You will often write code
2061 like this:
2062
2063 \snippet qstring/stringbuilder.cpp 0
2064
2065 There is nothing wrong with either of these string constructions,
2066 but there are a few hidden inefficiencies:
2067
2068 First, repeated use of the \c{'+'} operator may lead to
2069 multiple memory allocations. When concatenating \e{n} substrings,
2070 where \e{n > 2}, there can be as many as \e{n - 1} calls to the
2071 memory allocator.
2072
2073 These allocations can be optimized by an internal class
2074 \c{QStringBuilder}. This class is marked
2075 internal and does not appear in the documentation, because you
2076 aren't meant to instantiate it in your code. Its use will be
2077 automatic, as described below. The class is found in
2078 \c {src/corelib/tools/qstringbuilder.cpp} if you want to have a
2079 look at it.
2080
2081 \c{QStringBuilder} uses expression templates and reimplements the
2082 \c{'%'} operator so that when you use \c{'%'} for string
2083 concatenation instead of \c{'+'}, multiple substring
2084 concatenations will be postponed until the final result is about
2085 to be assigned to a QString. At this point, the amount of memory
2086 required for the final result is known. The memory allocator is
2087 then called \e{once} to get the required space, and the substrings
2088 are copied into it one by one.
2089
2090 Additional efficiency is gained by inlining and reducing reference
2091 counting (the QString created from a \c{QStringBuilder}
2092 has a ref count of 1, whereas QString::append() needs an extra
2093 test).
2094
2095 There are two ways you can access this improved method of string
2096 construction. The straightforward way is to include
2097 \c{QStringBuilder} wherever you want to use it and use the
2098 \c{'%'} operator instead of \c{'+'} when concatenating strings:
2099
2100 \snippet qstring/stringbuilder.cpp 5
2101
2102 A more global approach, which is more convenient but not entirely
2103 source-compatible, is to define \c QT_USE_QSTRINGBUILDER (by adding
2104 it to the compiler flags) at build time. This will make concatenating
2105 strings with \c{'+'} work the same way as \c{QStringBuilder's} \c{'%'}.
2106
2107 \note Using automatic type deduction (for example, by using the \c
2108 auto keyword) with the result of string concatenation when QStringBuilder
2109 is enabled will show that the concatenation is indeed an object of a
2110 QStringBuilder specialization:
2111
2112 \snippet qstring/stringbuilder.cpp 6
2113
2114 This does not cause any harm, as QStringBuilder will implicitly convert to
2115 QString when required. If this is undesirable, then one should specify
2116 the necessary types instead of having the compiler deduce them:
2117
2118 \snippet qstring/stringbuilder.cpp 7
2119
2120 \section1 Maximum size and out-of-memory conditions
2121
2122 The maximum size of QString depends on the architecture. Most 64-bit
2123 systems can allocate more than 2 GB of memory, with a typical limit
2124 of 2^63 bytes. The actual value also depends on the overhead required for
2125 managing the data block. As a result, you can expect a maximum size
2126 of 2 GB minus overhead on 32-bit platforms and 2^63 bytes minus overhead
2127 on 64-bit platforms. The number of elements that can be stored in a
2128 QString is this maximum size divided by the size of QChar.
2129
2130 When memory allocation fails, QString throws a \c std::bad_alloc
2131 exception if the application was compiled with exception support.
2132 Out-of-memory conditions in Qt containers are the only cases where Qt
2133 will throw exceptions. If exceptions are disabled, then running out of
2134 memory is undefined behavior.
2135
2136 \note Target operating systems may impose limits on how much memory an
2137 application can allocate, in total, or on the size of individual allocations.
2138 This may further restrict the size of string a QString can hold.
2139 Mitigating or controlling the behavior these limits cause is beyond the
2140 scope of the Qt API.
2141
2142 \sa fromRawData(), QChar, QStringView, QLatin1StringView, QByteArray
2143*/
2144
2145/*! \typedef QString::ConstIterator
2146
2147 Qt-style synonym for QString::const_iterator.
2148*/
2149
2150/*! \typedef QString::Iterator
2151
2152 Qt-style synonym for QString::iterator.
2153*/
2154
2155/*! \typedef QString::const_iterator
2156
2157 \sa QString::iterator
2158*/
2159
2160/*! \typedef QString::iterator
2161
2162 \sa QString::const_iterator
2163*/
2164
2165/*! \typedef QString::const_reverse_iterator
2166 \since 5.6
2167
2168 \sa QString::reverse_iterator, QString::const_iterator
2169*/
2170
2171/*! \typedef QString::reverse_iterator
2172 \since 5.6
2173
2174 \sa QString::const_reverse_iterator, QString::iterator
2175*/
2176
2177/*!
2178 \typedef QString::size_type
2179*/
2180
2181/*!
2182 \typedef QString::difference_type
2183*/
2184
2185/*!
2186 \typedef QString::const_reference
2187*/
2188/*!
2189 \typedef QString::reference
2190*/
2191
2192/*!
2193 \typedef QString::const_pointer
2194
2195 The QString::const_pointer typedef provides an STL-style
2196 const pointer to a QString element (QChar).
2197*/
2198/*!
2199 \typedef QString::pointer
2200
2201 The QString::pointer typedef provides an STL-style
2202 pointer to a QString element (QChar).
2203*/
2204
2205/*!
2206 \typedef QString::value_type
2207*/
2208
2209/*! \fn QString::iterator QString::begin()
2210
2211 Returns an \l{STL-style iterators}{STL-style iterator} pointing to the
2212 first character in the string.
2213
2214//! [iterator-invalidation-func-desc]
2215 \warning The returned iterator is invalidated on detachment or when the
2216 QString is modified.
2217//! [iterator-invalidation-func-desc]
2218
2219 \sa constBegin(), end()
2220*/
2221
2222/*! \fn QString::const_iterator QString::begin() const
2223
2224 \overload begin()
2225*/
2226
2227/*! \fn QString::const_iterator QString::cbegin() const
2228 \since 5.0
2229
2230 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2231 first character in the string.
2232
2233 \include qstring.cpp iterator-invalidation-func-desc
2234
2235 \sa begin(), cend()
2236*/
2237
2238/*! \fn QString::const_iterator QString::constBegin() const
2239
2240 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2241 first character in the string.
2242
2243 \include qstring.cpp iterator-invalidation-func-desc
2244
2245 \sa begin(), constEnd()
2246*/
2247
2248/*! \fn QString::iterator QString::end()
2249
2250 Returns an \l{STL-style iterators}{STL-style iterator} pointing just after
2251 the last character in the string.
2252
2253 \include qstring.cpp iterator-invalidation-func-desc
2254
2255 \sa begin(), constEnd()
2256*/
2257
2258/*! \fn QString::const_iterator QString::end() const
2259
2260 \overload end()
2261*/
2262
2263/*! \fn QString::const_iterator QString::cend() const
2264 \since 5.0
2265
2266 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2267 after the last character in the string.
2268
2269 \include qstring.cpp iterator-invalidation-func-desc
2270
2271 \sa cbegin(), end()
2272*/
2273
2274/*! \fn QString::const_iterator QString::constEnd() const
2275
2276 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2277 after the last character in the string.
2278
2279 \include qstring.cpp iterator-invalidation-func-desc
2280
2281 \sa constBegin(), end()
2282*/
2283
2284/*! \fn QString::reverse_iterator QString::rbegin()
2285 \since 5.6
2286
2287 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing to
2288 the first character in the string, in reverse order.
2289
2290 \include qstring.cpp iterator-invalidation-func-desc
2291
2292 \sa begin(), crbegin(), rend()
2293*/
2294
2295/*! \fn QString::const_reverse_iterator QString::rbegin() const
2296 \since 5.6
2297 \overload
2298*/
2299
2300/*! \fn QString::const_reverse_iterator QString::crbegin() const
2301 \since 5.6
2302
2303 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2304 pointing to the first character in the string, in reverse order.
2305
2306 \include qstring.cpp iterator-invalidation-func-desc
2307
2308 \sa begin(), rbegin(), rend()
2309*/
2310
2311/*! \fn QString::reverse_iterator QString::rend()
2312 \since 5.6
2313
2314 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing just
2315 after the last character in the string, in reverse order.
2316
2317 \include qstring.cpp iterator-invalidation-func-desc
2318
2319 \sa end(), crend(), rbegin()
2320*/
2321
2322/*! \fn QString::const_reverse_iterator QString::rend() const
2323 \since 5.6
2324 \overload
2325*/
2326
2327/*! \fn QString::const_reverse_iterator QString::crend() const
2328 \since 5.6
2329
2330 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2331 pointing just after the last character in the string, in reverse order.
2332
2333 \include qstring.cpp iterator-invalidation-func-desc
2334
2335 \sa end(), rend(), rbegin()
2336*/
2337
2338/*!
2339 \fn QString::QString()
2340
2341 Constructs a null string. Null strings are also considered empty.
2342
2343 \sa isEmpty(), isNull(), {Distinction Between Null and Empty Strings}
2344*/
2345
2346/*!
2347 \fn QString::QString(QString &&other)
2348
2349 Move-constructs a QString instance, making it point at the same
2350 object that \a other was pointing to.
2351
2352 \since 5.2
2353*/
2354
2355/*! \fn QString::QString(const char *str)
2356
2357 Constructs a string initialized with the 8-bit string \a str. The
2358 given const char pointer is converted to Unicode using the
2359 fromUtf8() function.
2360
2361 You can disable this constructor by defining
2362 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2363 can be useful if you want to ensure that all user-visible strings
2364 go through QObject::tr(), for example.
2365
2366 \note Defining \l QT_RESTRICTED_CAST_FROM_ASCII also disables
2367 this constructor, but enables a \c{QString(const char (&ch)[N])}
2368 constructor instead. Using non-literal input, or input with
2369 embedded NUL characters, or non-7-bit characters is undefined
2370 in this case.
2371
2372 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2373*/
2374
2375/*! \fn QString::QString(const char8_t *str)
2376
2377 Constructs a string initialized with the UTF-8 string \a str. The
2378 given const char8_t pointer is converted to Unicode using the
2379 fromUtf8() function.
2380
2381 \since 6.1
2382 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2383*/
2384
2385/*
2386//! [from-std-string]
2387Returns a copy of the \a str string. The given string is assumed to be
2388encoded in \1, and is converted to QString using the \2 function.
2389//! [from-std-string]
2390*/
2391
2392/*! \fn QString QString::fromStdString(const std::string &str)
2393
2394 \include qstring.cpp {from-std-string} {UTF-8} {fromUtf8()}
2395
2396 \sa fromLatin1(), fromLocal8Bit(), fromUtf8(), QByteArray::fromStdString()
2397*/
2398
2399/*! \fn QString QString::fromStdWString(const std::wstring &str)
2400
2401 Returns a copy of the \a str string. The given string is assumed
2402 to be encoded in utf16 if the size of wchar_t is 2 bytes (e.g. on
2403 windows) and ucs4 if the size of wchar_t is 4 bytes (most Unix
2404 systems).
2405
2406 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2407 fromStdU16String(), fromStdU32String()
2408*/
2409
2410/*! \fn QString QString::fromWCharArray(const wchar_t *string, qsizetype size)
2411 \since 4.2
2412
2413 Returns a copy of the \a string, where the encoding of \a string depends on
2414 the size of wchar. If wchar is 4 bytes, the \a string is interpreted as
2415 UCS-4, if wchar is 2 bytes it is interpreted as UTF-16.
2416
2417 If \a size is -1 (default), the \a string must be '\\0'-terminated.
2418
2419 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2420 fromStdWString()
2421*/
2422
2423/*! \fn std::wstring QString::toStdWString() const
2424
2425 Returns a std::wstring object with the data contained in this
2426 QString. The std::wstring is encoded in UTF-16 on platforms where
2427 wchar_t is 2 bytes wide (for example, Windows) and in UTF-32 on platforms
2428 where wchar_t is 4 bytes wide (most Unix systems).
2429
2430 This method is mostly useful to pass a QString to a function
2431 that accepts a std::wstring object.
2432
2433 \sa utf16(), toLatin1(), toUtf8(), toLocal8Bit(), toStdU16String(),
2434 toStdU32String()
2435*/
2436
2437qsizetype QString::toUcs4_helper(const char16_t *uc, qsizetype length, char32_t *out)
2438{
2439 qsizetype count = 0;
2440
2441 QStringIterator i(QStringView(uc, length));
2442 while (i.hasNext())
2443 out[count++] = i.next();
2444
2445 return count;
2446}
2447
2448/*! \fn qsizetype QString::toWCharArray(wchar_t *array) const
2449 \since 4.2
2450
2451 Fills the \a array with the data contained in this QString object.
2452 The array is encoded in UTF-16 on platforms where
2453 wchar_t is 2 bytes wide (e.g. windows) and in UCS-4 on platforms
2454 where wchar_t is 4 bytes wide (most Unix systems).
2455
2456 \a array has to be allocated by the caller and contain enough space to
2457 hold the complete string (allocating the array with the same length as the
2458 string is always sufficient).
2459
2460 This function returns the actual length of the string in \a array.
2461
2462 \note This function does not append a null character to the array.
2463
2464 \sa utf16(), toUcs4(), toLatin1(), toUtf8(), toLocal8Bit(), toStdWString(),
2465 QStringView::toWCharArray()
2466*/
2467
2468/*! \fn QString::QString(const QString &other)
2469
2470 Constructs a copy of \a other.
2471
2472 This operation takes \l{constant time}, because QString is
2473 \l{implicitly shared}. This makes returning a QString from a
2474 function very fast. If a shared instance is modified, it will be
2475 copied (copy-on-write), and that takes \l{linear time}.
2476
2477 \sa operator=()
2478*/
2479
2480/*!
2481 Constructs a string initialized with the first \a size characters
2482 of the QChar array \a unicode.
2483
2484 If \a unicode is 0, a null string is constructed.
2485
2486 If \a size is negative, \a unicode is assumed to point to a \\0'-terminated
2487 array and its length is determined dynamically. The terminating
2488 null character is not considered part of the string.
2489
2490 QString makes a deep copy of the string data. The unicode data is copied as
2491 is and the Byte Order Mark is preserved if present.
2492
2493 \sa fromRawData()
2494*/
2495QString::QString(const QChar *unicode, qsizetype size)
2496{
2497 if (!unicode) {
2498 d.clear();
2499 } else {
2500 if (size < 0)
2501 size = QtPrivate::qustrlen(str: reinterpret_cast<const char16_t *>(unicode));
2502 if (!size) {
2503 d = DataPointer::fromRawData(rawData: &_empty, length: 0);
2504 } else {
2505 d = DataPointer(size, size);
2506 Q_CHECK_PTR(d.data());
2507 memcpy(dest: d.data(), src: unicode, n: size * sizeof(QChar));
2508 d.data()[size] = '\0';
2509 }
2510 }
2511}
2512
2513/*!
2514 Constructs a string of the given \a size with every character set
2515 to \a ch.
2516
2517 \sa fill()
2518*/
2519QString::QString(qsizetype size, QChar ch)
2520{
2521 if (size <= 0) {
2522 d = DataPointer::fromRawData(rawData: &_empty, length: 0);
2523 } else {
2524 d = DataPointer(size, size);
2525 Q_CHECK_PTR(d.data());
2526 d.data()[size] = '\0';
2527 char16_t *b = d.data();
2528 char16_t *e = d.data() + size;
2529 const char16_t value = ch.unicode();
2530 std::fill(first: b, last: e, value: value);
2531 }
2532}
2533
2534/*! \fn QString::QString(qsizetype size, Qt::Initialization)
2535 \internal
2536
2537 Constructs a string of the given \a size without initializing the
2538 characters. This is only used in \c QStringBuilder::toString().
2539*/
2540QString::QString(qsizetype size, Qt::Initialization)
2541{
2542 if (size <= 0) {
2543 d = DataPointer::fromRawData(rawData: &_empty, length: 0);
2544 } else {
2545 d = DataPointer(size, size);
2546 Q_CHECK_PTR(d.data());
2547 d.data()[size] = '\0';
2548 }
2549}
2550
2551/*! \fn QString::QString(QLatin1StringView str)
2552
2553 Constructs a copy of the Latin-1 string viewed by \a str.
2554
2555 \sa fromLatin1()
2556*/
2557
2558/*!
2559 Constructs a string of size 1 containing the character \a ch.
2560*/
2561QString::QString(QChar ch)
2562{
2563 d = DataPointer(1, 1);
2564 Q_CHECK_PTR(d.data());
2565 d.data()[0] = ch.unicode();
2566 d.data()[1] = '\0';
2567}
2568
2569/*! \fn QString::QString(const QByteArray &ba)
2570
2571 Constructs a string initialized with the byte array \a ba. The
2572 given byte array is converted to Unicode using fromUtf8().
2573
2574 You can disable this constructor by defining
2575 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2576 can be useful if you want to ensure that all user-visible strings
2577 go through QObject::tr(), for example.
2578
2579 \note Any null ('\\0') bytes in the byte array will be included in this
2580 string, converted to Unicode null characters (U+0000). This behavior is
2581 different from Qt 5.x.
2582
2583 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2584*/
2585
2586/*! \fn QString::QString(const Null &)
2587 \internal
2588*/
2589
2590/*! \fn QString::QString(QStringPrivate)
2591 \internal
2592*/
2593
2594/*! \fn QString &QString::operator=(const QString::Null &)
2595 \internal
2596*/
2597
2598/*!
2599 \fn QString::~QString()
2600
2601 Destroys the string.
2602*/
2603
2604
2605/*! \fn void QString::swap(QString &other)
2606 \since 4.8
2607
2608 Swaps string \a other with this string. This operation is very fast and
2609 never fails.
2610*/
2611
2612/*! \fn void QString::detach()
2613
2614 \internal
2615*/
2616
2617/*! \fn bool QString::isDetached() const
2618
2619 \internal
2620*/
2621
2622/*! \fn bool QString::isSharedWith(const QString &other) const
2623
2624 \internal
2625*/
2626
2627/*! \fn QString::operator std::u16string_view() const
2628 \since 6.7
2629
2630 Converts this QString object to a \c{std::u16string_view} object.
2631*/
2632
2633static bool needsReallocate(const QString &str, qsizetype newSize)
2634{
2635 const auto capacityAtEnd = str.capacity() - str.data_ptr().freeSpaceAtBegin();
2636 return newSize > capacityAtEnd;
2637}
2638
2639/*!
2640 Sets the size of the string to \a size characters.
2641
2642 If \a size is greater than the current size, the string is
2643 extended to make it \a size characters long with the extra
2644 characters added to the end. The new characters are uninitialized.
2645
2646 If \a size is less than the current size, characters beyond position
2647 \a size are excluded from the string.
2648
2649 \note While resize() will grow the capacity if needed, it never shrinks
2650 capacity. To shed excess capacity, use squeeze().
2651
2652 Example:
2653
2654 \snippet qstring/main.cpp 45
2655
2656 If you want to append a certain number of identical characters to
2657 the string, use the \l {QString::}{resize(qsizetype, QChar)} overload.
2658
2659 If you want to expand the string so that it reaches a certain
2660 width and fill the new positions with a particular character, use
2661 the leftJustified() function:
2662
2663 If \a size is negative, it is equivalent to passing zero.
2664
2665 \snippet qstring/main.cpp 47
2666
2667 \sa truncate(), reserve(), squeeze()
2668*/
2669
2670void QString::resize(qsizetype size)
2671{
2672 if (size < 0)
2673 size = 0;
2674
2675 if (d->needsDetach() || needsReallocate(str: *this, newSize: size))
2676 reallocData(alloc: size, option: QArrayData::Grow);
2677 d.size = size;
2678 if (d->allocatedCapacity())
2679 d.data()[size] = u'\0';
2680}
2681
2682/*!
2683 \overload
2684 \since 5.7
2685
2686 Unlike \l {QString::}{resize(qsizetype)}, this overload
2687 initializes the new characters to \a fillChar:
2688
2689 \snippet qstring/main.cpp 46
2690*/
2691
2692void QString::resize(qsizetype newSize, QChar fillChar)
2693{
2694 const qsizetype oldSize = size();
2695 resize(size: newSize);
2696 const qsizetype difference = size() - oldSize;
2697 if (difference > 0)
2698 std::fill_n(first: d.data() + oldSize, n: difference, value: fillChar.unicode());
2699}
2700
2701
2702/*!
2703 \since 6.8
2704
2705 Sets the size of the string to \a size characters. If the size of
2706 the string grows, the new characters are uninitialized.
2707
2708 The behavior is identical to \c{resize(size)}.
2709
2710 \sa resize()
2711*/
2712
2713void QString::resizeForOverwrite(qsizetype size)
2714{
2715 resize(size);
2716}
2717
2718
2719/*! \fn qsizetype QString::capacity() const
2720
2721 Returns the maximum number of characters that can be stored in
2722 the string without forcing a reallocation.
2723
2724 The sole purpose of this function is to provide a means of fine
2725 tuning QString's memory usage. In general, you will rarely ever
2726 need to call this function. If you want to know how many
2727 characters are in the string, call size().
2728
2729 \note a statically allocated string will report a capacity of 0,
2730 even if it's not empty.
2731
2732 \note The free space position in the allocated memory block is undefined. In
2733 other words, one should not assume that the free memory is always located
2734 after the initialized elements.
2735
2736 \sa reserve(), squeeze()
2737*/
2738
2739/*!
2740 \fn void QString::reserve(qsizetype size)
2741
2742 Ensures the string has space for at least \a size characters.
2743
2744 If you know in advance how large a string will be, you can call this
2745 function to save repeated reallocation while building it.
2746 This can improve performance when building a string incrementally.
2747 A long sequence of operations that add to a string may trigger several
2748 reallocations, the last of which may leave you with significantly more
2749 space than you need. This is less efficient than doing a single
2750 allocation of the right size at the start.
2751
2752 If in doubt about how much space shall be needed, it is usually better to
2753 use an upper bound as \a size, or a high estimate of the most likely size,
2754 if a strict upper bound would be much bigger than this. If \a size is an
2755 underestimate, the string will grow as needed once the reserved size is
2756 exceeded, which may lead to a larger allocation than your best
2757 overestimate would have and will slow the operation that triggers it.
2758
2759 \warning reserve() reserves memory but does not change the size of the
2760 string. Accessing data beyond the end of the string is undefined behavior.
2761 If you need to access memory beyond the current end of the string,
2762 use resize().
2763
2764 This function is useful for code that needs to build up a long
2765 string and wants to avoid repeated reallocation. In this example,
2766 we want to add to the string until some condition is \c true, and
2767 we're fairly sure that size is large enough to make a call to
2768 reserve() worthwhile:
2769
2770 \snippet qstring/main.cpp 44
2771
2772 \sa squeeze(), capacity(), resize()
2773*/
2774
2775/*!
2776 \fn void QString::squeeze()
2777
2778 Releases any memory not required to store the character data.
2779
2780 The sole purpose of this function is to provide a means of fine
2781 tuning QString's memory usage. In general, you will rarely ever
2782 need to call this function.
2783
2784 \sa reserve(), capacity()
2785*/
2786
2787void QString::reallocData(qsizetype alloc, QArrayData::AllocationOption option)
2788{
2789 if (!alloc) {
2790 d = DataPointer::fromRawData(rawData: &_empty, length: 0);
2791 return;
2792 }
2793
2794 // don't use reallocate path when reducing capacity and there's free space
2795 // at the beginning: might shift data pointer outside of allocated space
2796 const bool cannotUseReallocate = d.freeSpaceAtBegin() > 0;
2797
2798 if (d->needsDetach() || cannotUseReallocate) {
2799 DataPointer dd(alloc, qMin(a: alloc, b: d.size), option);
2800 Q_CHECK_PTR(dd.data());
2801 if (dd.size > 0)
2802 ::memcpy(dest: dd.data(), src: d.data(), n: dd.size * sizeof(QChar));
2803 dd.data()[dd.size] = 0;
2804 d = dd;
2805 } else {
2806 d->reallocate(alloc, option);
2807 }
2808}
2809
2810void QString::reallocGrowData(qsizetype n)
2811{
2812 if (!n) // expected to always allocate
2813 n = 1;
2814
2815 if (d->needsDetach()) {
2816 DataPointer dd(DataPointer::allocateGrow(from: d, n, position: QArrayData::GrowsAtEnd));
2817 Q_CHECK_PTR(dd.data());
2818 dd->copyAppend(b: d.data(), e: d.data() + d.size);
2819 dd.data()[dd.size] = 0;
2820 d = dd;
2821 } else {
2822 d->reallocate(alloc: d.constAllocatedCapacity() + n, option: QArrayData::Grow);
2823 }
2824}
2825
2826/*! \fn void QString::clear()
2827
2828 Clears the contents of the string and makes it null.
2829
2830 \sa resize(), isNull()
2831*/
2832
2833/*! \fn QString &QString::operator=(const QString &other)
2834
2835 Assigns \a other to this string and returns a reference to this
2836 string.
2837*/
2838
2839QString &QString::operator=(const QString &other) noexcept
2840{
2841 d = other.d;
2842 return *this;
2843}
2844
2845/*!
2846 \fn QString &QString::operator=(QString &&other)
2847
2848 Move-assigns \a other to this QString instance.
2849
2850 \since 5.2
2851*/
2852
2853/*! \fn QString &QString::operator=(QLatin1StringView str)
2854
2855 \overload operator=()
2856
2857 Assigns the Latin-1 string viewed by \a str to this string.
2858*/
2859QString &QString::operator=(QLatin1StringView other)
2860{
2861 const qsizetype capacityAtEnd = capacity() - d.freeSpaceAtBegin();
2862 if (isDetached() && other.size() <= capacityAtEnd) { // assumes d->alloc == 0 -> !isDetached() (sharedNull)
2863 d.size = other.size();
2864 d.data()[other.size()] = 0;
2865 qt_from_latin1(dst: d.data(), str: other.latin1(), size: other.size());
2866 } else {
2867 *this = fromLatin1(str: other.latin1(), size: other.size());
2868 }
2869 return *this;
2870}
2871
2872/*! \fn QString &QString::operator=(const QByteArray &ba)
2873
2874 \overload operator=()
2875
2876 Assigns \a ba to this string. The byte array is converted to Unicode
2877 using the fromUtf8() function.
2878
2879 You can disable this operator by defining
2880 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2881 can be useful if you want to ensure that all user-visible strings
2882 go through QObject::tr(), for example.
2883*/
2884
2885/*! \fn QString &QString::operator=(const char *str)
2886
2887 \overload operator=()
2888
2889 Assigns \a str to this string. The const char pointer is converted
2890 to Unicode using the fromUtf8() function.
2891
2892 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
2893 or \l QT_RESTRICTED_CAST_FROM_ASCII when you compile your applications.
2894 This can be useful if you want to ensure that all user-visible strings
2895 go through QObject::tr(), for example.
2896*/
2897
2898/*!
2899 \overload operator=()
2900
2901 Sets the string to contain the single character \a ch.
2902*/
2903QString &QString::operator=(QChar ch)
2904{
2905 return assign(n: 1, c: ch);
2906}
2907
2908/*!
2909 \fn QString& QString::insert(qsizetype position, const QString &str)
2910
2911 Inserts the string \a str at the given index \a position and
2912 returns a reference to this string.
2913
2914 Example:
2915
2916 \snippet qstring/main.cpp 26
2917
2918//! [string-grow-at-insertion]
2919 This string grows to accommodate the insertion. If \a position is beyond
2920 the end of the string, space characters are appended to the string to reach
2921 this \a position, followed by \a str.
2922//! [string-grow-at-insertion]
2923
2924 \sa append(), prepend(), replace(), remove()
2925*/
2926
2927/*!
2928 \fn QString& QString::insert(qsizetype position, QStringView str)
2929 \since 6.0
2930 \overload insert()
2931
2932 Inserts the string view \a str at the given index \a position and
2933 returns a reference to this string.
2934
2935 \include qstring.cpp string-grow-at-insertion
2936*/
2937
2938
2939/*!
2940 \fn QString& QString::insert(qsizetype position, const char *str)
2941 \since 5.5
2942 \overload insert()
2943
2944 Inserts the C string \a str at the given index \a position and
2945 returns a reference to this string.
2946
2947 \include qstring.cpp string-grow-at-insertion
2948
2949 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2950 defined.
2951*/
2952
2953/*!
2954 \fn QString& QString::insert(qsizetype position, const QByteArray &str)
2955 \since 5.5
2956 \overload insert()
2957
2958 Interprets the contents of \a str as UTF-8, inserts the Unicode string
2959 it encodes at the given index \a position and returns a reference to
2960 this string.
2961
2962 \include qstring.cpp string-grow-at-insertion
2963
2964 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2965 defined.
2966*/
2967
2968/*! \internal
2969 T is a view or a container on/of QChar, char16_t, or char
2970*/
2971template <typename T>
2972static void insert_helper(QString &str, qsizetype i, const T &toInsert)
2973{
2974 auto &str_d = str.data_ptr();
2975 qsizetype difference = 0;
2976 if (Q_UNLIKELY(i > str_d.size))
2977 difference = i - str_d.size;
2978 const qsizetype oldSize = str_d.size;
2979 const qsizetype insert_size = toInsert.size();
2980 const qsizetype newSize = str_d.size + difference + insert_size;
2981 const auto side = i == 0 ? QArrayData::GrowsAtBeginning : QArrayData::GrowsAtEnd;
2982
2983 if (str_d.needsDetach() || needsReallocate(str, newSize)) {
2984 const auto cbegin = str.cbegin();
2985 const auto cend = str.cend();
2986 const auto insert_start = difference == 0 ? std::next(x: cbegin, n: i) : cend;
2987 QString other;
2988 // Using detachAndGrow() so that prepend optimization works and QStringBuilder
2989 // unittests pass
2990 other.data_ptr().detachAndGrow(where: side, n: newSize, data: nullptr, old: nullptr);
2991 other.append(v: QStringView(cbegin, insert_start));
2992 other.resize(newSize: i, fillChar: u' ');
2993 other.append(toInsert);
2994 other.append(v: QStringView(insert_start, cend));
2995 str.swap(other);
2996 return;
2997 }
2998
2999 str_d.detachAndGrow(where: side, n: difference + insert_size, data: nullptr, old: nullptr);
3000 Q_CHECK_PTR(str_d.data());
3001 str.resize(size: newSize);
3002
3003 auto begin = str_d.begin();
3004 auto old_end = std::next(x: begin, n: oldSize);
3005 std::fill_n(first: old_end, n: difference, value: u' ');
3006 auto insert_start = std::next(x: begin, n: i);
3007 if (difference == 0)
3008 std::move_backward(first: insert_start, last: old_end, result: str_d.end());
3009
3010 using Char = std::remove_cv_t<typename T::value_type>;
3011 if constexpr(std::is_same_v<Char, QChar>)
3012 std::copy_n(first: reinterpret_cast<const char16_t *>(toInsert.data()), n: insert_size, result: insert_start);
3013 else if constexpr (std::is_same_v<Char, char16_t>)
3014 std::copy_n(toInsert.data(), insert_size, insert_start);
3015 else if constexpr (std::is_same_v<Char, char>)
3016 qt_from_latin1(insert_start, toInsert.data(), insert_size);
3017}
3018
3019/*!
3020 \fn QString &QString::insert(qsizetype position, QLatin1StringView str)
3021 \overload insert()
3022
3023 Inserts the Latin-1 string viewed by \a str at the given index \a position.
3024
3025 \include qstring.cpp string-grow-at-insertion
3026*/
3027QString &QString::insert(qsizetype i, QLatin1StringView str)
3028{
3029 const char *s = str.latin1();
3030 if (i < 0 || !s || !(*s))
3031 return *this;
3032
3033 insert_helper(str&: *this, i, toInsert: str);
3034 return *this;
3035}
3036
3037/*!
3038 \fn QString &QString::insert(qsizetype position, QUtf8StringView str)
3039 \overload insert()
3040 \since 6.5
3041
3042 Inserts the UTF-8 string view \a str at the given index \a position.
3043
3044 \note Inserting variable-width UTF-8-encoded string data is conceptually slower
3045 than inserting fixed-width string data such as UTF-16 (QStringView) or Latin-1
3046 (QLatin1StringView) and should thus be used sparingly.
3047
3048 \include qstring.cpp string-grow-at-insertion
3049*/
3050QString &QString::insert(qsizetype i, QUtf8StringView s)
3051{
3052 auto insert_size = s.size();
3053 if (i < 0 || insert_size <= 0)
3054 return *this;
3055
3056 qsizetype difference = 0;
3057 if (Q_UNLIKELY(i > d.size))
3058 difference = i - d.size;
3059
3060 const qsizetype newSize = d.size + difference + insert_size;
3061
3062 if (d.needsDetach() || needsReallocate(str: *this, newSize)) {
3063 const auto cbegin = this->cbegin();
3064 const auto insert_start = difference == 0 ? std::next(x: cbegin, n: i) : cend();
3065 QString other;
3066 other.reserve(asize: newSize);
3067 other.append(v: QStringView(cbegin, insert_start));
3068 if (difference > 0)
3069 other.resize(newSize: i, fillChar: u' ');
3070 other.append(s);
3071 other.append(v: QStringView(insert_start, cend()));
3072 swap(other);
3073 return *this;
3074 }
3075
3076 if (i >= d.size) {
3077 d.detachAndGrow(where: QArrayData::GrowsAtEnd, n: difference + insert_size, data: nullptr, old: nullptr);
3078 Q_CHECK_PTR(d.data());
3079
3080 if (difference > 0)
3081 resize(newSize: i, fillChar: u' ');
3082 append(s);
3083 } else {
3084 // Optimal insertion of Utf8 data is at the end, anywhere else could
3085 // potentially lead to moving characters twice if Utf8 data size
3086 // (variable-width) is less than the equivalent Utf16 data size
3087 QVarLengthArray<char16_t> buffer(insert_size); // ### optimize (QTBUG-108546)
3088 char16_t *b = QUtf8::convertToUnicode(dst: buffer.data(), in: s);
3089 insert_helper(str&: *this, i, toInsert: QStringView(buffer.data(), b));
3090 }
3091
3092 return *this;
3093}
3094
3095/*!
3096 \fn QString& QString::insert(qsizetype position, const QChar *unicode, qsizetype size)
3097 \overload insert()
3098
3099 Inserts the first \a size characters of the QChar array \a unicode
3100 at the given index \a position in the string.
3101
3102 This string grows to accommodate the insertion. If \a position is beyond
3103 the end of the string, space characters are appended to the string to reach
3104 this \a position, followed by \a size characters of the QChar array
3105 \a unicode.
3106*/
3107QString& QString::insert(qsizetype i, const QChar *unicode, qsizetype size)
3108{
3109 if (i < 0 || size <= 0)
3110 return *this;
3111
3112 // In case when data points into "this"
3113 if (!d->needsDetach() && QtPrivate::q_points_into_range(p: unicode, c: *this)) {
3114 QVarLengthArray copy(unicode, unicode + size);
3115 insert(i, unicode: copy.data(), size);
3116 } else {
3117 insert_helper(str&: *this, i, toInsert: QStringView(unicode, size));
3118 }
3119
3120 return *this;
3121}
3122
3123/*!
3124 \fn QString& QString::insert(qsizetype position, QChar ch)
3125 \overload insert()
3126
3127 Inserts \a ch at the given index \a position in the string.
3128
3129 This string grows to accommodate the insertion. If \a position is beyond
3130 the end of the string, space characters are appended to the string to reach
3131 this \a position, followed by \a ch.
3132*/
3133
3134QString& QString::insert(qsizetype i, QChar ch)
3135{
3136 if (i < 0)
3137 i += d.size;
3138 return insert(i, unicode: &ch, size: 1);
3139}
3140
3141/*!
3142 Appends the string \a str onto the end of this string.
3143
3144 Example:
3145
3146 \snippet qstring/main.cpp 9
3147
3148 This is the same as using the insert() function:
3149
3150 \snippet qstring/main.cpp 10
3151
3152 The append() function is typically very fast (\l{constant time}),
3153 because QString preallocates extra space at the end of the string
3154 data so it can grow without reallocating the entire string each
3155 time.
3156
3157 \sa operator+=(), prepend(), insert()
3158*/
3159QString &QString::append(const QString &str)
3160{
3161 if (!str.isNull()) {
3162 if (isNull()) {
3163 if (Q_UNLIKELY(!str.d.isMutable()))
3164 assign(s: str); // fromRawData, so we do a deep copy
3165 else
3166 operator=(other: str);
3167 } else if (str.size()) {
3168 append(uc: str.constData(), len: str.size());
3169 }
3170 }
3171 return *this;
3172}
3173
3174/*!
3175 \fn QString &QString::append(QStringView v)
3176 \overload append()
3177 \since 6.0
3178
3179 Appends the given string view \a v to this string and returns the result.
3180*/
3181
3182/*!
3183 \overload append()
3184 \since 5.0
3185
3186 Appends \a len characters from the QChar array \a str to this string.
3187*/
3188QString &QString::append(const QChar *str, qsizetype len)
3189{
3190 if (str && len > 0) {
3191 static_assert(sizeof(QChar) == sizeof(char16_t), "Unexpected difference in sizes");
3192 // the following should be safe as QChar uses char16_t as underlying data
3193 const char16_t *char16String = reinterpret_cast<const char16_t *>(str);
3194 d->growAppend(b: char16String, e: char16String + len);
3195 d.data()[d.size] = u'\0';
3196 }
3197 return *this;
3198}
3199
3200/*!
3201 \overload append()
3202
3203 Appends the Latin-1 string viewed by \a str to this string.
3204*/
3205QString &QString::append(QLatin1StringView str)
3206{
3207 append_helper(self&: *this, view: str);
3208 return *this;
3209}
3210
3211/*!
3212 \overload append()
3213 \since 6.5
3214
3215 Appends the UTF-8 string view \a str to this string.
3216*/
3217QString &QString::append(QUtf8StringView str)
3218{
3219 append_helper(self&: *this, view: str);
3220 return *this;
3221}
3222
3223/*! \fn QString &QString::append(const QByteArray &ba)
3224
3225 \overload append()
3226
3227 Appends the byte array \a ba to this string. The given byte array
3228 is converted to Unicode using the fromUtf8() function.
3229
3230 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3231 when you compile your applications. This can be useful if you want
3232 to ensure that all user-visible strings go through QObject::tr(),
3233 for example.
3234*/
3235
3236/*! \fn QString &QString::append(const char *str)
3237
3238 \overload append()
3239
3240 Appends the string \a str to this string. The given const char
3241 pointer is converted to Unicode using the fromUtf8() function.
3242
3243 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3244 when you compile your applications. This can be useful if you want
3245 to ensure that all user-visible strings go through QObject::tr(),
3246 for example.
3247*/
3248
3249/*!
3250 \overload append()
3251
3252 Appends the character \a ch to this string.
3253*/
3254QString &QString::append(QChar ch)
3255{
3256 d.detachAndGrow(where: QArrayData::GrowsAtEnd, n: 1, data: nullptr, old: nullptr);
3257 d->copyAppend(n: 1, t: ch.unicode());
3258 d.data()[d.size] = '\0';
3259 return *this;
3260}
3261
3262/*! \fn QString &QString::prepend(const QString &str)
3263
3264 Prepends the string \a str to the beginning of this string and
3265 returns a reference to this string.
3266
3267 This operation is typically very fast (\l{constant time}), because
3268 QString preallocates extra space at the beginning of the string data,
3269 so it can grow without reallocating the entire string each time.
3270
3271 Example:
3272
3273 \snippet qstring/main.cpp 36
3274
3275 \sa append(), insert()
3276*/
3277
3278/*! \fn QString &QString::prepend(QLatin1StringView str)
3279
3280 \overload prepend()
3281
3282 Prepends the Latin-1 string viewed by \a str to this string.
3283*/
3284
3285/*! \fn QString &QString::prepend(QUtf8StringView str)
3286 \since 6.5
3287 \overload prepend()
3288
3289 Prepends the UTF-8 string view \a str to this string.
3290*/
3291
3292/*! \fn QString &QString::prepend(const QChar *str, qsizetype len)
3293 \since 5.5
3294 \overload prepend()
3295
3296 Prepends \a len characters from the QChar array \a str to this string and
3297 returns a reference to this string.
3298*/
3299
3300/*! \fn QString &QString::prepend(QStringView str)
3301 \since 6.0
3302 \overload prepend()
3303
3304 Prepends the string view \a str to the beginning of this string and
3305 returns a reference to this string.
3306*/
3307
3308/*! \fn QString &QString::prepend(const QByteArray &ba)
3309
3310 \overload prepend()
3311
3312 Prepends the byte array \a ba to this string. The byte array is
3313 converted to Unicode using the fromUtf8() function.
3314
3315 You can disable this function by defining
3316 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3317 can be useful if you want to ensure that all user-visible strings
3318 go through QObject::tr(), for example.
3319*/
3320
3321/*! \fn QString &QString::prepend(const char *str)
3322
3323 \overload prepend()
3324
3325 Prepends the string \a str to this string. The const char pointer
3326 is converted to Unicode using the fromUtf8() function.
3327
3328 You can disable this function by defining
3329 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3330 can be useful if you want to ensure that all user-visible strings
3331 go through QObject::tr(), for example.
3332*/
3333
3334/*! \fn QString &QString::prepend(QChar ch)
3335
3336 \overload prepend()
3337
3338 Prepends the character \a ch to this string.
3339*/
3340
3341/*!
3342 \fn QString &QString::assign(QAnyStringView v)
3343 \since 6.6
3344
3345 Replaces the contents of this string with a copy of \a v and returns a
3346 reference to this string.
3347
3348 The size of this string will be equal to the size of \a v, converted to
3349 UTF-16 as if by \c{v.toString()}. Unlike QAnyStringView::toString(), however,
3350 this function only allocates memory if the estimated size exceeds the capacity
3351 of this string or this string is shared.
3352
3353 \sa QAnyStringView::toString()
3354*/
3355
3356/*!
3357 \fn QString &QString::assign(qsizetype n, QChar c)
3358 \since 6.6
3359
3360 Replaces the contents of this string with \a n copies of \a c and
3361 returns a reference to this string.
3362
3363 The size of this string will be equal to \a n, which has to be non-negative.
3364
3365 This function will only allocate memory if \a n exceeds the capacity of this
3366 string or this string is shared.
3367
3368 \sa fill()
3369*/
3370
3371/*!
3372 \fn template <typename InputIterator, QString::if_compatible_iterator<InputIterator>> QString &QString::assign(InputIterator first, InputIterator last)
3373 \since 6.6
3374
3375 Replaces the contents of this string with a copy of the elements in the
3376 iterator range [\a first, \a last) and returns a reference to this string.
3377
3378 The size of this string will be equal to the decoded length of the elements
3379 in the range [\a first, \a last), which need not be the same as the length of
3380 the range itself, because this function transparently recodes the input
3381 character set to UTF-16.
3382
3383 This function will only allocate memory if the number of elements in the
3384 range, or, for non-UTF-16-encoded input, the maximum possible size of the
3385 resulting string, exceeds the capacity of this string, or if this string is
3386 shared.
3387
3388 \note This function overload only participates in overload resolution if
3389 \c InputIterator meets the requirements of a
3390 \l {https://en.cppreference.com/w/cpp/named_req/InputIterator} {LegacyInputIterator}
3391 and the \c{value_type} of \c InputIterator is one of the following character types:
3392 \list
3393 \li QChar
3394 \li QLatin1Char
3395 \li \c {char}
3396 \li \c {unsigned char}
3397 \li \c {signed char}
3398 \li \c {char8_t}
3399 \li \c char16_t
3400 \li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t
3401 \li \c char32_t
3402 \endlist
3403
3404 \note The behavior is undefined if either argument is an iterator into *this or
3405 [\a first, \a last) is not a valid range.
3406*/
3407
3408QString &QString::assign(QAnyStringView s)
3409{
3410 if (s.size() <= capacity() && isDetached()) {
3411 const auto offset = d.freeSpaceAtBegin();
3412 if (offset)
3413 d.setBegin(d.begin() - offset);
3414 resize(size: 0);
3415 s.visit(v: [this](auto input) {
3416 this->append(input);
3417 });
3418 } else {
3419 *this = s.toString();
3420 }
3421 return *this;
3422}
3423
3424#ifndef QT_BOOTSTRAPPED
3425QString &QString::assign_helper(const char32_t *data, qsizetype len)
3426{
3427 // worst case: each char32_t requires a surrogate pair, so
3428 const auto requiredCapacity = len * 2;
3429 if (requiredCapacity <= capacity() && isDetached()) {
3430 const auto offset = d.freeSpaceAtBegin();
3431 if (offset)
3432 d.setBegin(d.begin() - offset);
3433 auto begin = reinterpret_cast<QChar *>(d.begin());
3434 auto ba = QByteArrayView(reinterpret_cast<const std::byte*>(data), len * sizeof(char32_t));
3435 QStringConverter::State state;
3436 const auto end = QUtf32::convertToUnicode(out: begin, ba, state: &state, endian: DetectEndianness);
3437 d.size = end - begin;
3438 d.data()[d.size] = u'\0';
3439 } else {
3440 *this = QString::fromUcs4(data, size: len);
3441 }
3442 return *this;
3443}
3444#endif
3445
3446/*!
3447 \fn QString &QString::remove(qsizetype position, qsizetype n)
3448
3449 Removes \a n characters from the string, starting at the given \a
3450 position index, and returns a reference to the string.
3451
3452 If the specified \a position index is within the string, but \a
3453 position + \a n is beyond the end of the string, the string is
3454 truncated at the specified \a position.
3455
3456 If \a n is <= 0 nothing is changed.
3457
3458 \snippet qstring/main.cpp 37
3459
3460//! [shrinking-erase]
3461 Element removal will preserve the string's capacity and not reduce the
3462 amount of allocated memory. To shed extra capacity and free as much memory
3463 as possible, call squeeze() after the last change to the string's size.
3464//! [shrinking-erase]
3465
3466 \sa insert(), replace()
3467*/
3468QString &QString::remove(qsizetype pos, qsizetype len)
3469{
3470 if (pos < 0) // count from end of string
3471 pos += size();
3472
3473 if (size_t(pos) >= size_t(size()) || len <= 0)
3474 return *this;
3475
3476 len = std::min(a: len, b: size() - pos);
3477
3478 if (!d->isShared()) {
3479 d->erase(b: d.begin() + pos, n: len);
3480 d.data()[d.size] = u'\0';
3481 } else {
3482 // TODO: either reserve "size()", which is bigger than needed, or
3483 // modify the shrinking-erase docs of this method (since the size
3484 // of "copy" won't have any extra capacity any more)
3485 const qsizetype sz = size() - len;
3486 QString copy{sz, Qt::Uninitialized};
3487 auto begin = d.begin();
3488 auto toRemove_start = d.begin() + pos;
3489 copy.d->copyRanges(ranges: {{.begin: begin, .end: toRemove_start},
3490 {.begin: toRemove_start + len, .end: d.end()}});
3491 swap(other&: copy);
3492 }
3493 return *this;
3494}
3495
3496template<typename T>
3497static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs)
3498{
3499 const auto needleSize = needle.size();
3500 if (!needleSize)
3501 return;
3502
3503 // avoid detach if nothing to do:
3504 qsizetype i = s.indexOf(needle, 0, cs);
3505 if (i < 0)
3506 return;
3507
3508 QString::DataPointer &dptr = s.data_ptr();
3509 auto begin = dptr.begin();
3510 auto end = dptr.end();
3511
3512 auto copyFunc = [&](auto &dst) {
3513 auto src = begin + i + needleSize;
3514 while (src < end) {
3515 i = s.indexOf(needle, std::distance(begin, src), cs);
3516 auto hit = i == -1 ? end : begin + i;
3517 dst = std::copy(src, hit, dst);
3518 src = hit + needleSize;
3519 }
3520 return dst;
3521 };
3522
3523 if (!dptr->needsDetach()) {
3524 auto dst = begin + i;
3525 dst = copyFunc(dst);
3526 s.truncate(pos: std::distance(first: begin, last: dst));
3527 } else {
3528 QString copy{s.size(), Qt::Uninitialized};
3529 auto copy_begin = copy.begin();
3530 auto dst = std::copy(first: begin, last: begin + i, result: copy_begin); // Chunk before the first hit
3531 dst = copyFunc(dst);
3532 copy.resize(size: std::distance(first: copy_begin, last: dst));
3533 s.swap(other&: copy);
3534 }
3535}
3536
3537/*!
3538 Removes every occurrence of the given \a str string in this
3539 string, and returns a reference to this string.
3540
3541 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3542
3543 This is the same as \c replace(str, "", cs).
3544
3545 \include qstring.cpp shrinking-erase
3546
3547 \sa replace()
3548*/
3549QString &QString::remove(const QString &str, Qt::CaseSensitivity cs)
3550{
3551 const auto s = str.d.data();
3552 if (QtPrivate::q_points_into_range(p: s, c: d))
3553 removeStringImpl(s&: *this, needle: QStringView{QVarLengthArray(s, s + str.size())}, cs);
3554 else
3555 removeStringImpl(s&: *this, needle: qToStringViewIgnoringNull(s: str), cs);
3556 return *this;
3557}
3558
3559/*!
3560 \since 5.11
3561 \overload
3562
3563 Removes every occurrence of the given Latin-1 string viewed by \a str
3564 from this string, and returns a reference to this string.
3565
3566 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3567
3568 This is the same as \c replace(str, "", cs).
3569
3570 \include qstring.cpp shrinking-erase
3571
3572 \sa replace()
3573*/
3574QString &QString::remove(QLatin1StringView str, Qt::CaseSensitivity cs)
3575{
3576 removeStringImpl(s&: *this, needle: str, cs);
3577 return *this;
3578}
3579
3580/*!
3581 \fn QString &QString::removeAt(qsizetype pos)
3582
3583 \since 6.5
3584
3585 Removes the character at index \a pos. If \a pos is out of bounds
3586 (i.e. \a pos >= size()), this function does nothing.
3587
3588 \sa remove()
3589*/
3590
3591/*!
3592 \fn QString &QString::removeFirst()
3593
3594 \since 6.5
3595
3596 Removes the first character in this string. If the string is empty,
3597 this function does nothing.
3598
3599 \sa remove()
3600*/
3601
3602/*!
3603 \fn QString &QString::removeLast()
3604
3605 \since 6.5
3606
3607 Removes the last character in this string. If the string is empty,
3608 this function does nothing.
3609
3610 \sa remove()
3611*/
3612
3613/*!
3614 Removes every occurrence of the character \a ch in this string, and
3615 returns a reference to this string.
3616
3617 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3618
3619 Example:
3620
3621 \snippet qstring/main.cpp 38
3622
3623 This is the same as \c replace(ch, "", cs).
3624
3625 \include qstring.cpp shrinking-erase
3626
3627 \sa replace()
3628*/
3629QString &QString::remove(QChar ch, Qt::CaseSensitivity cs)
3630{
3631 const qsizetype idx = indexOf(c: ch, from: 0, cs);
3632 if (idx == -1)
3633 return *this;
3634
3635 const bool isCase = cs == Qt::CaseSensitive;
3636 ch = isCase ? ch : ch.toCaseFolded();
3637 auto match = [ch, isCase](QChar x) {
3638 return ch == (isCase ? x : x.toCaseFolded());
3639 };
3640
3641
3642 auto begin = d.begin();
3643 auto first_match = begin + idx;
3644 auto end = d.end();
3645 if (!d->isShared()) {
3646 auto it = std::remove_if(first: first_match, last: end, pred: match);
3647 d->erase(b: it, n: std::distance(first: it, last: end));
3648 d.data()[d.size] = u'\0';
3649 } else {
3650 // Instead of detaching, create a new string and copy all characters except for
3651 // the ones we're removing
3652 // TODO: size() is more than the needed since "copy" would be shorter
3653 QString copy{size(), Qt::Uninitialized};
3654 auto dst = copy.d.begin();
3655 auto it = std::copy(first: begin, last: first_match, result: dst); // Chunk before idx
3656 it = std::remove_copy_if(first: first_match + 1, last: end, result: it, pred: match);
3657 copy.d.size = std::distance(first: dst, last: it);
3658 copy.d.data()[copy.d.size] = u'\0';
3659 *this = std::move(copy);
3660 }
3661 return *this;
3662}
3663
3664/*!
3665 \fn QString &QString::remove(const QRegularExpression &re)
3666 \since 5.0
3667
3668 Removes every occurrence of the regular expression \a re in the
3669 string, and returns a reference to the string. For example:
3670
3671 \snippet qstring/main.cpp 96
3672
3673 \include qstring.cpp shrinking-erase
3674
3675 \sa indexOf(), lastIndexOf(), replace()
3676*/
3677
3678/*!
3679 \fn template <typename Predicate> QString &QString::removeIf(Predicate pred)
3680 \since 6.1
3681
3682 Removes all elements for which the predicate \a pred returns true
3683 from the string. Returns a reference to the string.
3684
3685 \sa remove()
3686*/
3687
3688
3689/*! \internal
3690 Instead of detaching, or reallocating if "before" is shorter than "after"
3691 and there isn't enough capacity, create a new string, copy characters to it
3692 as needed, then swap it with "str".
3693*/
3694static void replace_with_copy(QString &str, QSpan<size_t> indices, qsizetype blen,
3695 QStringView after)
3696{
3697 const qsizetype alen = after.size();
3698 const char16_t *after_b = after.utf16();
3699
3700 const QString::DataPointer &str_d = str.data_ptr();
3701 auto src_start = str_d.begin();
3702 const qsizetype newSize = str_d.size + indices.size() * (alen - blen);
3703 QString copy{ newSize, Qt::Uninitialized };
3704 QString::DataPointer &copy_d = copy.data_ptr();
3705 auto dst = copy_d.begin();
3706 for (size_t index : indices) {
3707 auto hit = str_d.begin() + index;
3708 dst = std::copy(first: src_start, last: hit, result: dst);
3709 dst = std::copy_n(first: after_b, n: alen, result: dst);
3710 src_start = hit + blen;
3711 }
3712 dst = std::copy(first: src_start, last: str_d.end(), result: dst);
3713 str.swap(other&: copy);
3714}
3715
3716// No detaching or reallocation is needed
3717static void replace_in_place(QString &str, QSpan<size_t> indices,
3718 qsizetype blen, QStringView after)
3719{
3720 const qsizetype alen = after.size();
3721 const char16_t *after_b = after.utf16();
3722 const char16_t *after_e = after.utf16() + after.size();
3723
3724 if (blen == alen) { // Replace in place
3725 for (size_t index : indices)
3726 std::copy_n(first: after_b, n: alen, result: str.data_ptr().begin() + index);
3727 } else if (blen > alen) { // Replace from front
3728 char16_t *begin = str.data_ptr().begin();
3729 char16_t *hit = begin + indices.front();
3730 char16_t *to = hit;
3731 to = std::copy_n(first: after_b, n: alen, result: to);
3732 char16_t *movestart = hit + blen;
3733 for (size_t index : indices.sliced(pos: 1)) {
3734 hit = begin + index;
3735 to = std::move(first: movestart, last: hit, result: to);
3736 to = std::copy_n(first: after_b, n: alen, result: to);
3737 movestart = hit + blen;
3738 }
3739 to = std::move(first: movestart, last: str.data_ptr().end(), result: to);
3740 str.resize(size: std::distance(first: begin, last: to));
3741 } else { // blen < alen, Replace from back
3742 const qsizetype oldSize = str.data_ptr().size;
3743 const qsizetype adjust = indices.size() * (alen - blen);
3744 const qsizetype newSize = oldSize + adjust;
3745
3746 str.resize(size: newSize);
3747 char16_t *begin = str.data_ptr().begin();
3748 char16_t *moveend = begin + oldSize;
3749 char16_t *to = str.data_ptr().end();
3750
3751 for (auto it = indices.rbegin(), end = indices.rend(); it != end; ++it) {
3752 char16_t *hit = begin + *it;
3753 char16_t *movestart = hit + blen;
3754 to = std::move_backward(first: movestart, last: moveend, result: to);
3755 to = std::copy_backward(first: after_b, last: after_e, result: to);
3756 moveend = hit;
3757 }
3758 }
3759}
3760
3761static void replace_helper(QString &str, QSpan<size_t> indices, qsizetype blen, QStringView after)
3762{
3763 const qsizetype oldSize = str.data_ptr().size;
3764 const qsizetype adjust = indices.size() * (after.size() - blen);
3765 const qsizetype newSize = oldSize + adjust;
3766 if (str.data_ptr().needsDetach()) {
3767 replace_with_copy(str, indices, blen, after);
3768 return;
3769 }
3770
3771 str.reserve(asize: newSize);
3772
3773 if (QtPrivate::q_points_into_range(p: after.begin(), c: str))
3774 // Copy after if it lies inside our own d.b area (which we could
3775 // possibly invalidate via a realloc or modify by replacement)
3776 replace_in_place(str, indices, blen, after: QVarLengthArray(after.begin(), after.end()));
3777 else
3778 replace_in_place(str, indices, blen, after);
3779}
3780
3781/*!
3782 \fn QString &QString::replace(qsizetype position, qsizetype n, const QString &after)
3783
3784 Replaces \a n characters beginning at index \a position with
3785 the string \a after and returns a reference to this string.
3786
3787 \note If the specified \a position index is within the string,
3788 but \a position + \a n goes outside the strings range,
3789 then \a n will be adjusted to stop at the end of the string.
3790
3791 Example:
3792
3793 \snippet qstring/main.cpp 40
3794
3795 \sa insert(), remove()
3796*/
3797QString &QString::replace(qsizetype pos, qsizetype len, const QString &after)
3798{
3799 return replace(i: pos, len, s: after.constData(), slen: after.size());
3800}
3801
3802/*!
3803 \fn QString &QString::replace(qsizetype position, qsizetype n, const QChar *after, qsizetype alen)
3804 \overload replace()
3805 Replaces \a n characters beginning at index \a position with the
3806 first \a alen characters of the QChar array \a after and returns a
3807 reference to this string.
3808*/
3809QString &QString::replace(qsizetype pos, qsizetype len, const QChar *after, qsizetype alen)
3810{
3811 if (size_t(pos) > size_t(this->size()))
3812 return *this;
3813 if (len > this->size() - pos)
3814 len = this->size() - pos;
3815
3816 size_t index = pos;
3817 replace_helper(str&: *this, indices: QSpan(&index, 1), blen: len, after: QStringView{after, alen});
3818 return *this;
3819}
3820
3821/*!
3822 \fn QString &QString::replace(qsizetype position, qsizetype n, QChar after)
3823 \overload replace()
3824
3825 Replaces \a n characters beginning at index \a position with the
3826 character \a after and returns a reference to this string.
3827*/
3828QString &QString::replace(qsizetype pos, qsizetype len, QChar after)
3829{
3830 return replace(pos, len, after: &after, alen: 1);
3831}
3832
3833/*!
3834 \overload replace()
3835 Replaces every occurrence of the string \a before with the string \a
3836 after and returns a reference to this string.
3837
3838 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3839
3840 Example:
3841
3842 \snippet qstring/main.cpp 41
3843
3844 \note The replacement text is not rescanned after it is inserted.
3845
3846 Example:
3847
3848 \snippet qstring/main.cpp 86
3849
3850//! [empty-before-arg-in-replace]
3851 \note If you use an empty \a before argument, the \a after argument will be
3852 inserted \e {before and after} each character of the string.
3853//! [empty-before-arg-in-replace]
3854
3855*/
3856QString &QString::replace(const QString &before, const QString &after, Qt::CaseSensitivity cs)
3857{
3858 return replace(before: before.constData(), blen: before.size(), after: after.constData(), alen: after.size(), cs);
3859}
3860
3861/*!
3862 \since 4.5
3863 \overload replace()
3864
3865 Replaces each occurrence in this string of the first \a blen
3866 characters of \a before with the first \a alen characters of \a
3867 after and returns a reference to this string.
3868
3869 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3870
3871 \note If \a before points to an \e empty string (that is, \a blen == 0),
3872 the string pointed to by \a after will be inserted \e {before and after}
3873 each character in this string.
3874*/
3875QString &QString::replace(const QChar *before, qsizetype blen,
3876 const QChar *after, qsizetype alen,
3877 Qt::CaseSensitivity cs)
3878{
3879 if (d.size == 0) {
3880 if (blen)
3881 return *this;
3882 } else {
3883 if (cs == Qt::CaseSensitive && before == after && blen == alen)
3884 return *this;
3885 }
3886 if (alen == 0 && blen == 0)
3887 return *this;
3888 if (alen == 1 && blen == 1)
3889 return replace(before: *before, after: *after, cs);
3890
3891 QStringMatcher matcher(before, blen, cs);
3892
3893 qsizetype index = 0;
3894
3895 QVarLengthArray<size_t> indices;
3896 while ((index = matcher.indexIn(str: *this, from: index)) != -1) {
3897 indices.push_back(t: index);
3898 if (blen) // Step over before:
3899 index += blen;
3900 else // Only count one instance of empty between any two characters:
3901 index++;
3902 }
3903 if (indices.isEmpty())
3904 return *this;
3905
3906 replace_helper(str&: *this, indices, blen, after: QStringView{after, alen});
3907 return *this;
3908}
3909
3910/*!
3911 \overload replace()
3912 Replaces every occurrence of the character \a ch in the string with
3913 \a after and returns a reference to this string.
3914
3915 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3916*/
3917QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs)
3918{
3919 if (after.size() == 0)
3920 return remove(ch, cs);
3921
3922 if (after.size() == 1)
3923 return replace(before: ch, after: after.front(), cs);
3924
3925 if (size() == 0)
3926 return *this;
3927
3928 const char16_t cc = (cs == Qt::CaseSensitive ? ch.unicode() : ch.toCaseFolded().unicode());
3929
3930 QVarLengthArray<size_t> indices;
3931 if (cs == Qt::CaseSensitive) {
3932 const char16_t *begin = d.begin();
3933 const char16_t *end = d.end();
3934 QStringView view(begin, end);
3935 const char16_t *hit = nullptr;
3936 while ((hit = QtPrivate::qustrchr(str: view, c: cc)) != end) {
3937 indices.push_back(t: std::distance(first: begin, last: hit));
3938 view = QStringView(std::next(x: hit), end);
3939 }
3940 } else {
3941 for (qsizetype i = 0; i < d.size; ++i)
3942 if (QChar::toCaseFolded(ucs4: d.data()[i]) == cc)
3943 indices.push_back(t: i);
3944 }
3945 if (indices.isEmpty())
3946 return *this;
3947
3948 replace_helper(str&: *this, indices, blen: 1, after);
3949 return *this;
3950}
3951
3952/*!
3953 \overload replace()
3954 Replaces every occurrence of the character \a before with the
3955 character \a after and returns a reference to this string.
3956
3957 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3958*/
3959QString& QString::replace(QChar before, QChar after, Qt::CaseSensitivity cs)
3960{
3961 const qsizetype idx = indexOf(c: before, from: 0, cs);
3962 if (idx == -1)
3963 return *this;
3964
3965 const char16_t achar = after.unicode();
3966 char16_t bchar = before.unicode();
3967
3968 auto matchesCIS = [](char16_t beforeChar) {
3969 return [beforeChar](char16_t ch) { return foldAndCompare(a: ch, b: beforeChar); };
3970 };
3971
3972 auto hit = d.begin() + idx;
3973 if (!d.needsDetach()) {
3974 *hit++ = achar;
3975 if (cs == Qt::CaseSensitive) {
3976 std::replace(first: hit, last: d.end(), old_value: bchar, new_value: achar);
3977 } else {
3978 bchar = foldCase(ch: bchar);
3979 std::replace_if(first: hit, last: d.end(), pred: matchesCIS(bchar), new_value: achar);
3980 }
3981 } else {
3982 QString other{ d.size, Qt::Uninitialized };
3983 auto dest = std::copy(first: d.begin(), last: hit, result: other.d.begin());
3984 *dest++ = achar;
3985 ++hit;
3986 if (cs == Qt::CaseSensitive) {
3987 std::replace_copy(first: hit, last: d.end(), result: dest, old_value: bchar, new_value: achar);
3988 } else {
3989 bchar = foldCase(ch: bchar);
3990 std::replace_copy_if(first: hit, last: d.end(), result: dest, pred: matchesCIS(bchar), new_value: achar);
3991 }
3992
3993 swap(other);
3994 }
3995 return *this;
3996}
3997
3998/*!
3999 \since 4.5
4000 \overload replace()
4001
4002 Replaces every occurrence in this string of the Latin-1 string viewed
4003 by \a before with the Latin-1 string viewed by \a after, and returns a
4004 reference to this string.
4005
4006 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4007
4008 \note The text is not rescanned after a replacement.
4009
4010 \include qstring.cpp empty-before-arg-in-replace
4011*/
4012QString &QString::replace(QLatin1StringView before, QLatin1StringView after, Qt::CaseSensitivity cs)
4013{
4014 const qsizetype alen = after.size();
4015 const qsizetype blen = before.size();
4016 if (blen == 1 && alen == 1)
4017 return replace(before: before.front(), after: after.front(), cs);
4018
4019 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(str: after);
4020 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(str: before);
4021 return replace(before: (const QChar *)b.data(), blen, after: (const QChar *)a.data(), alen, cs);
4022}
4023
4024/*!
4025 \since 4.5
4026 \overload replace()
4027
4028 Replaces every occurrence in this string of the Latin-1 string viewed
4029 by \a before with the string \a after, and returns a reference to this
4030 string.
4031
4032 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4033
4034 \note The text is not rescanned after a replacement.
4035
4036 \include qstring.cpp empty-before-arg-in-replace
4037*/
4038QString &QString::replace(QLatin1StringView before, const QString &after, Qt::CaseSensitivity cs)
4039{
4040 const qsizetype blen = before.size();
4041 if (blen == 1 && after.size() == 1)
4042 return replace(before: before.front(), after: after.front(), cs);
4043
4044 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(str: before);
4045 return replace(before: (const QChar *)b.data(), blen, after: after.constData(), alen: after.d.size, cs);
4046}
4047
4048/*!
4049 \since 4.5
4050 \overload replace()
4051
4052 Replaces every occurrence of the string \a before with the string \a
4053 after and returns a reference to this string.
4054
4055 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4056
4057 \note The text is not rescanned after a replacement.
4058
4059 \include qstring.cpp empty-before-arg-in-replace
4060*/
4061QString &QString::replace(const QString &before, QLatin1StringView after, Qt::CaseSensitivity cs)
4062{
4063 const qsizetype alen = after.size();
4064 if (before.size() == 1 && alen == 1)
4065 return replace(before: before.front(), after: after.front(), cs);
4066
4067 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(str: after);
4068 return replace(before: before.constData(), blen: before.d.size, after: (const QChar *)a.data(), alen, cs);
4069}
4070
4071/*!
4072 \since 4.5
4073 \overload replace()
4074
4075 Replaces every occurrence of the character \a c with the string \a
4076 after and returns a reference to this string.
4077
4078 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4079
4080 \note The text is not rescanned after a replacement.
4081*/
4082QString &QString::replace(QChar c, QLatin1StringView after, Qt::CaseSensitivity cs)
4083{
4084 const qsizetype alen = after.size();
4085 if (alen == 1)
4086 return replace(before: c, after: after.front(), cs);
4087
4088 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(str: after);
4089 return replace(before: &c, blen: 1, after: (const QChar *)a.data(), alen, cs);
4090}
4091
4092/*!
4093 \fn bool QString::operator==(const QString &lhs, const QString &rhs)
4094 \overload operator==()
4095
4096 Returns \c true if string \a lhs is equal to string \a rhs; otherwise
4097 returns \c false.
4098
4099 \include qstring.cpp compare-isNull-vs-isEmpty
4100
4101 \sa {Comparing Strings}
4102*/
4103
4104/*!
4105 \fn bool QString::operator==(const QString &lhs, const QLatin1StringView &rhs)
4106
4107 \overload operator==()
4108
4109 Returns \c true if \a lhs is equal to \a rhs; otherwise
4110 returns \c false.
4111*/
4112
4113/*!
4114 \fn bool QString::operator==(const QLatin1StringView &lhs, const QString &rhs)
4115
4116 \overload operator==()
4117
4118 Returns \c true if \a lhs is equal to \a rhs; otherwise
4119 returns \c false.
4120*/
4121
4122/*! \fn bool QString::operator==(const QString &lhs, const QByteArray &rhs)
4123
4124 \overload operator==()
4125
4126 The \a rhs byte array is converted to a QUtf8StringView.
4127
4128 You can disable this operator by defining
4129 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4130 can be useful if you want to ensure that all user-visible strings
4131 go through QObject::tr(), for example.
4132
4133 Returns \c true if string \a lhs is lexically equal to \a rhs.
4134 Otherwise returns \c false.
4135*/
4136
4137/*! \fn bool QString::operator==(const QString &lhs, const char * const &rhs)
4138
4139 \overload operator==()
4140
4141 The \a rhs const char pointer is converted to a QUtf8StringView.
4142
4143 You can disable this operator by defining
4144 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4145 can be useful if you want to ensure that all user-visible strings
4146 go through QObject::tr(), for example.
4147*/
4148
4149/*!
4150 \fn bool QString::operator<(const QString &lhs, const QString &rhs)
4151
4152 \overload operator<()
4153
4154 Returns \c true if string \a lhs is lexically less than string
4155 \a rhs; otherwise returns \c false.
4156
4157 \sa {Comparing Strings}
4158*/
4159
4160/*!
4161 \fn bool QString::operator<(const QString &lhs, const QLatin1StringView &rhs)
4162
4163 \overload operator<()
4164
4165 Returns \c true if \a lhs is lexically less than \a rhs;
4166 otherwise returns \c false.
4167*/
4168
4169/*!
4170 \fn bool QString::operator<(const QLatin1StringView &lhs, const QString &rhs)
4171
4172 \overload operator<()
4173
4174 Returns \c true if \a lhs is lexically less than \a rhs;
4175 otherwise returns \c false.
4176*/
4177
4178/*! \fn bool QString::operator<(const QString &lhs, const QByteArray &rhs)
4179
4180 \overload operator<()
4181
4182 The \a rhs byte array is converted to a QUtf8StringView.
4183 If any NUL characters ('\\0') are embedded in the byte array, they will be
4184 included in the transformation.
4185
4186 You can disable this operator
4187 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4188 can be useful if you want to ensure that all user-visible strings
4189 go through QObject::tr(), for example.
4190*/
4191
4192/*! \fn bool QString::operator<(const QString &lhs, const char * const &rhs)
4193
4194 Returns \c true if string \a lhs is lexically less than string \a rhs.
4195 Otherwise returns \c false.
4196
4197 \overload operator<()
4198
4199 The \a rhs const char pointer is converted to a QUtf8StringView.
4200
4201 You can disable this operator by defining
4202 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4203 can be useful if you want to ensure that all user-visible strings
4204 go through QObject::tr(), for example.
4205*/
4206
4207/*! \fn bool QString::operator<=(const QString &lhs, const QString &rhs)
4208
4209 Returns \c true if string \a lhs is lexically less than or equal to
4210 string \a rhs; otherwise returns \c false.
4211
4212 \sa {Comparing Strings}
4213*/
4214
4215/*!
4216 \fn bool QString::operator<=(const QString &lhs, const QLatin1StringView &rhs)
4217
4218 \overload operator<=()
4219
4220 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4221 otherwise returns \c false.
4222*/
4223
4224/*!
4225 \fn bool QString::operator<=(const QLatin1StringView &lhs, const QString &rhs)
4226
4227 \overload operator<=()
4228
4229 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4230 otherwise returns \c false.
4231*/
4232
4233/*! \fn bool QString::operator<=(const QString &lhs, const QByteArray &rhs)
4234
4235 \overload operator<=()
4236
4237 The \a rhs byte array is converted to a QUtf8StringView.
4238 If any NUL characters ('\\0') are embedded in the byte array, they will be
4239 included in the transformation.
4240
4241 You can disable this operator by defining
4242 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4243 can be useful if you want to ensure that all user-visible strings
4244 go through QObject::tr(), for example.
4245*/
4246
4247/*! \fn bool QString::operator<=(const QString &lhs, const char * const &rhs)
4248
4249 \overload operator<=()
4250
4251 The \a rhs const char pointer is converted to a QUtf8StringView.
4252
4253 You can disable this operator by defining
4254 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4255 can be useful if you want to ensure that all user-visible strings
4256 go through QObject::tr(), for example.
4257*/
4258
4259/*! \fn bool QString::operator>(const QString &lhs, const QString &rhs)
4260
4261 Returns \c true if string \a lhs is lexically greater than string \a rhs;
4262 otherwise returns \c false.
4263
4264 \sa {Comparing Strings}
4265*/
4266
4267/*!
4268 \fn bool QString::operator>(const QString &lhs, const QLatin1StringView &rhs)
4269
4270 \overload operator>()
4271
4272 Returns \c true if \a lhs is lexically greater than \a rhs;
4273 otherwise returns \c false.
4274*/
4275
4276/*!
4277 \fn bool QString::operator>(const QLatin1StringView &lhs, const QString &rhs)
4278
4279 \overload operator>()
4280
4281 Returns \c true if \a lhs is lexically greater than \a rhs;
4282 otherwise returns \c false.
4283*/
4284
4285/*! \fn bool QString::operator>(const QString &lhs, const QByteArray &rhs)
4286
4287 \overload operator>()
4288
4289 The \a rhs byte array is converted to a QUtf8StringView.
4290 If any NUL characters ('\\0') are embedded in the byte array, they will be
4291 included in the transformation.
4292
4293 You can disable this operator by defining
4294 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4295 can be useful if you want to ensure that all user-visible strings
4296 go through QObject::tr(), for example.
4297*/
4298
4299/*! \fn bool QString::operator>(const QString &lhs, const char * const &rhs)
4300
4301 \overload operator>()
4302
4303 The \a rhs const char pointer is converted to a QUtf8StringView.
4304
4305 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4306 when you compile your applications. This can be useful if you want
4307 to ensure that all user-visible strings go through QObject::tr(),
4308 for example.
4309*/
4310
4311/*! \fn bool QString::operator>=(const QString &lhs, const QString &rhs)
4312
4313 Returns \c true if string \a lhs is lexically greater than or equal to
4314 string \a rhs; otherwise returns \c false.
4315
4316 \sa {Comparing Strings}
4317*/
4318
4319/*!
4320 \fn bool QString::operator>=(const QString &lhs, const QLatin1StringView &rhs)
4321
4322 \overload operator>=()
4323
4324 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4325 otherwise returns \c false.
4326*/
4327
4328/*!
4329 \fn bool QString::operator>=(const QLatin1StringView &lhs, const QString &rhs)
4330
4331 \overload operator>=()
4332
4333 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4334 otherwise returns \c false.
4335*/
4336
4337/*! \fn bool QString::operator>=(const QString &lhs, const QByteArray &rhs)
4338
4339 \overload operator>=()
4340
4341 The \a rhs byte array is converted to a QUtf8StringView.
4342 If any NUL characters ('\\0') are embedded in the byte array, they will be
4343 included in the transformation.
4344
4345 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4346 when you compile your applications. This can be useful if you want
4347 to ensure that all user-visible strings go through QObject::tr(),
4348 for example.
4349*/
4350
4351/*! \fn bool QString::operator>=(const QString &lhs, const char * const &rhs)
4352
4353 \overload operator>=()
4354
4355 The \a rhs const char pointer is converted to a QUtf8StringView.
4356
4357 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4358 when you compile your applications. This can be useful if you want
4359 to ensure that all user-visible strings go through QObject::tr(),
4360 for example.
4361*/
4362
4363/*! \fn bool QString::operator!=(const QString &lhs, const QString &rhs)
4364
4365 Returns \c true if string \a lhs is not equal to string \a rhs;
4366 otherwise returns \c false.
4367
4368 \sa {Comparing Strings}
4369*/
4370
4371/*! \fn bool QString::operator!=(const QString &lhs, const QLatin1StringView &rhs)
4372
4373 Returns \c true if string \a lhs is not equal to string \a rhs.
4374 Otherwise returns \c false.
4375
4376 \overload operator!=()
4377*/
4378
4379/*! \fn bool QString::operator!=(const QString &lhs, const QByteArray &rhs)
4380
4381 \overload operator!=()
4382
4383 The \a rhs byte array is converted to a QUtf8StringView.
4384 If any NUL characters ('\\0') are embedded in the byte array, they will be
4385 included in the transformation.
4386
4387 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4388 when you compile your applications. This can be useful if you want
4389 to ensure that all user-visible strings go through QObject::tr(),
4390 for example.
4391*/
4392
4393/*! \fn bool QString::operator!=(const QString &lhs, const char * const &rhs)
4394
4395 \overload operator!=()
4396
4397 The \a rhs const char pointer is converted to a QUtf8StringView.
4398
4399 You can disable this operator by defining
4400 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4401 can be useful if you want to ensure that all user-visible strings
4402 go through QObject::tr(), for example.
4403*/
4404
4405/*! \fn bool QString::operator==(const QByteArray &lhs, const QString &rhs)
4406
4407 Returns \c true if byte array \a lhs is equal to the UTF-8 encoding of
4408 \a rhs; otherwise returns \c false.
4409
4410 The comparison is case sensitive.
4411
4412 You can disable this operator by defining \c
4413 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4414 then need to call QString::fromUtf8(), QString::fromLatin1(),
4415 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4416 array to a QString before doing the comparison.
4417*/
4418
4419/*! \fn bool QString::operator!=(const QByteArray &lhs, const QString &rhs)
4420
4421 Returns \c true if byte array \a lhs is not equal to the UTF-8 encoding of
4422 \a rhs; otherwise returns \c false.
4423
4424 The comparison is case sensitive.
4425
4426 You can disable this operator by defining \c
4427 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4428 then need to call QString::fromUtf8(), QString::fromLatin1(),
4429 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4430 array to a QString before doing the comparison.
4431*/
4432
4433/*! \fn bool QString::operator<(const QByteArray &lhs, const QString &rhs)
4434
4435 Returns \c true if byte array \a lhs is lexically less than the UTF-8 encoding
4436 of \a rhs; otherwise returns \c false.
4437
4438 The comparison is case sensitive.
4439
4440 You can disable this operator by defining \c
4441 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4442 then need to call QString::fromUtf8(), QString::fromLatin1(),
4443 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4444 array to a QString before doing the comparison.
4445*/
4446
4447/*! \fn bool QString::operator>(const QByteArray &lhs, const QString &rhs)
4448
4449 Returns \c true if byte array \a lhs is lexically greater than the UTF-8
4450 encoding of \a rhs; otherwise returns \c false.
4451
4452 The comparison is case sensitive.
4453
4454 You can disable this operator by defining \c
4455 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4456 then need to call QString::fromUtf8(), QString::fromLatin1(),
4457 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4458 array to a QString before doing the comparison.
4459*/
4460
4461/*! \fn bool QString::operator<=(const QByteArray &lhs, const QString &rhs)
4462
4463 Returns \c true if byte array \a lhs is lexically less than or equal to the
4464 UTF-8 encoding of \a rhs; otherwise returns \c false.
4465
4466 The comparison is case sensitive.
4467
4468 You can disable this operator by defining \c
4469 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4470 then need to call QString::fromUtf8(), QString::fromLatin1(),
4471 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4472 array to a QString before doing the comparison.
4473*/
4474
4475/*! \fn bool QString::operator>=(const QByteArray &lhs, const QString &rhs)
4476
4477 Returns \c true if byte array \a lhs is greater than or equal to the UTF-8
4478 encoding of \a rhs; otherwise returns \c false.
4479
4480 The comparison is case sensitive.
4481
4482 You can disable this operator by defining \c
4483 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4484 then need to call QString::fromUtf8(), QString::fromLatin1(),
4485 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4486 array to a QString before doing the comparison.
4487*/
4488
4489/*!
4490 \include qstring.qdocinc {qstring-first-index-of} {string} {str}
4491
4492 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4493
4494 Example:
4495
4496 \snippet qstring/main.cpp 24
4497
4498 \include qstring.qdocinc negative-index-start-search-from-end
4499
4500 \sa lastIndexOf(), contains(), count()
4501*/
4502qsizetype QString::indexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4503{
4504 return QtPrivate::findString(haystack: QStringView(unicode(), size()), from, needle: QStringView(str.unicode(), str.size()), cs);
4505}
4506
4507/*!
4508 \fn qsizetype QString::indexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4509 \since 5.14
4510 \overload indexOf()
4511
4512 \include qstring.qdocinc {qstring-first-index-of} {string view} {str}
4513
4514 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4515
4516 \include qstring.qdocinc negative-index-start-search-from-end
4517
4518 \sa QStringView::indexOf(), lastIndexOf(), contains(), count()
4519*/
4520
4521/*!
4522 \since 4.5
4523
4524 \include {qstring.qdocinc} {qstring-first-index-of} {Latin-1 string viewed by} {str}
4525
4526 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4527
4528 Example:
4529
4530 \snippet qstring/main.cpp 24
4531
4532 \include qstring.qdocinc negative-index-start-search-from-end
4533
4534 \sa lastIndexOf(), contains(), count()
4535*/
4536
4537qsizetype QString::indexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4538{
4539 return QtPrivate::findString(haystack: QStringView(unicode(), size()), from, needle: str, cs);
4540}
4541
4542/*!
4543 \fn qsizetype QString::indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4544 \overload indexOf()
4545
4546 \include qstring.qdocinc {qstring-first-index-of} {character} {ch}
4547*/
4548
4549/*!
4550 \include qstring.qdocinc {qstring-last-index-of} {string} {str}
4551
4552 \include qstring.qdocinc negative-index-start-search-from-end
4553
4554 Returns -1 if \a str is not found.
4555
4556 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4557
4558 Example:
4559
4560 \snippet qstring/main.cpp 29
4561
4562 \note When searching for a 0-length \a str, the match at the end of
4563 the data is excluded from the search by a negative \a from, even
4564 though \c{-1} is normally thought of as searching from the end of the
4565 string: the match at the end is \e after the last character, so it is
4566 excluded. To include such a final empty match, either give a positive
4567 value for \a from or omit the \a from parameter entirely.
4568
4569 \sa indexOf(), contains(), count()
4570*/
4571qsizetype QString::lastIndexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4572{
4573 return QtPrivate::lastIndexOf(haystack: QStringView(*this), from, needle: str, cs);
4574}
4575
4576/*!
4577 \fn qsizetype QString::lastIndexOf(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4578 \since 6.2
4579 \overload lastIndexOf()
4580
4581 Returns the index position of the last occurrence of the string \a
4582 str in this string. Returns -1 if \a str is not found.
4583
4584 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4585
4586 Example:
4587
4588 \snippet qstring/main.cpp 29
4589
4590 \sa indexOf(), contains(), count()
4591*/
4592
4593
4594/*!
4595 \since 4.5
4596 \overload lastIndexOf()
4597
4598 \include qstring.qdocinc {qstring-last-index-of} {Latin-1 string viewed by} {str}
4599
4600 \include qstring.qdocinc negative-index-start-search-from-end
4601
4602 Returns -1 if \a str is not found.
4603
4604 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4605
4606 Example:
4607
4608 \snippet qstring/main.cpp 29
4609
4610 \note When searching for a 0-length \a str, the match at the end of
4611 the data is excluded from the search by a negative \a from, even
4612 though \c{-1} is normally thought of as searching from the end of the
4613 string: the match at the end is \e after the last character, so it is
4614 excluded. To include such a final empty match, either give a positive
4615 value for \a from or omit the \a from parameter entirely.
4616
4617 \sa indexOf(), contains(), count()
4618*/
4619qsizetype QString::lastIndexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4620{
4621 return QtPrivate::lastIndexOf(haystack: *this, from, needle: str, cs);
4622}
4623
4624/*!
4625 \fn qsizetype QString::lastIndexOf(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4626 \since 6.2
4627 \overload lastIndexOf()
4628
4629 Returns the index position of the last occurrence of the string \a
4630 str in this string. Returns -1 if \a str is not found.
4631
4632 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4633
4634 Example:
4635
4636 \snippet qstring/main.cpp 29
4637
4638 \sa indexOf(), contains(), count()
4639*/
4640
4641/*!
4642 \fn qsizetype QString::lastIndexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4643 \overload lastIndexOf()
4644
4645 \include qstring.qdocinc {qstring-last-index-of} {character} {ch}
4646*/
4647
4648/*!
4649 \fn QString::lastIndexOf(QChar ch, Qt::CaseSensitivity) const
4650 \since 6.3
4651 \overload lastIndexOf()
4652*/
4653
4654/*!
4655 \fn qsizetype QString::lastIndexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4656 \since 5.14
4657 \overload lastIndexOf()
4658
4659 \include qstring.qdocinc {qstring-last-index-of} {string view} {str}
4660
4661 \include qstring.qdocinc negative-index-start-search-from-end
4662
4663 Returns -1 if \a str is not found.
4664
4665 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4666
4667 \note When searching for a 0-length \a str, the match at the end of
4668 the data is excluded from the search by a negative \a from, even
4669 though \c{-1} is normally thought of as searching from the end of the
4670 string: the match at the end is \e after the last character, so it is
4671 excluded. To include such a final empty match, either give a positive
4672 value for \a from or omit the \a from parameter entirely.
4673
4674 \sa indexOf(), contains(), count()
4675*/
4676
4677/*!
4678 \fn qsizetype QString::lastIndexOf(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4679 \since 6.2
4680 \overload lastIndexOf()
4681
4682 Returns the index position of the last occurrence of the string view \a
4683 str in this string. Returns -1 if \a str is not found.
4684
4685 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4686
4687 \sa indexOf(), contains(), count()
4688*/
4689
4690#if QT_CONFIG(regularexpression)
4691struct QStringCapture
4692{
4693 qsizetype pos;
4694 qsizetype len;
4695 int no;
4696};
4697Q_DECLARE_TYPEINFO(QStringCapture, Q_PRIMITIVE_TYPE);
4698
4699/*!
4700 \overload replace()
4701 \since 5.0
4702
4703 Replaces every occurrence of the regular expression \a re in the
4704 string with \a after. Returns a reference to the string. For
4705 example:
4706
4707 \snippet qstring/main.cpp 87
4708
4709 For regular expressions containing capturing groups,
4710 occurrences of \b{\\1}, \b{\\2}, ..., in \a after are replaced
4711 with the string captured by the corresponding capturing group.
4712
4713 \snippet qstring/main.cpp 88
4714
4715 \sa indexOf(), lastIndexOf(), remove(), QRegularExpression, QRegularExpressionMatch
4716*/
4717QString &QString::replace(const QRegularExpression &re, const QString &after)
4718{
4719 if (!re.isValid()) {
4720 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString::replace");
4721 return *this;
4722 }
4723
4724 const QString copy(*this);
4725 QRegularExpressionMatchIterator iterator = re.globalMatch(subject: copy);
4726 if (!iterator.hasNext()) // no matches at all
4727 return *this;
4728
4729 reallocData(alloc: d.size, option: QArrayData::KeepSize);
4730
4731 qsizetype numCaptures = re.captureCount();
4732
4733 // 1. build the backreferences list, holding where the backreferences
4734 // are in the replacement string
4735 QVarLengthArray<QStringCapture> backReferences;
4736 const qsizetype al = after.size();
4737 const QChar *ac = after.unicode();
4738
4739 for (qsizetype i = 0; i < al - 1; i++) {
4740 if (ac[i] == u'\\') {
4741 int no = ac[i + 1].digitValue();
4742 if (no > 0 && no <= numCaptures) {
4743 QStringCapture backReference;
4744 backReference.pos = i;
4745 backReference.len = 2;
4746
4747 if (i < al - 2) {
4748 int secondDigit = ac[i + 2].digitValue();
4749 if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) {
4750 no = (no * 10) + secondDigit;
4751 ++backReference.len;
4752 }
4753 }
4754
4755 backReference.no = no;
4756 backReferences.append(t: backReference);
4757 }
4758 }
4759 }
4760
4761 // 2. iterate on the matches. For every match, copy in chunks
4762 // - the part before the match
4763 // - the after string, with the proper replacements for the backreferences
4764
4765 qsizetype newLength = 0; // length of the new string, with all the replacements
4766 qsizetype lastEnd = 0;
4767 QVarLengthArray<QStringView> chunks;
4768 const QStringView copyView{ copy }, afterView{ after };
4769 while (iterator.hasNext()) {
4770 QRegularExpressionMatch match = iterator.next();
4771 qsizetype len;
4772 // add the part before the match
4773 len = match.capturedStart() - lastEnd;
4774 if (len > 0) {
4775 chunks << copyView.mid(pos: lastEnd, n: len);
4776 newLength += len;
4777 }
4778
4779 lastEnd = 0;
4780 // add the after string, with replacements for the backreferences
4781 for (const QStringCapture &backReference : std::as_const(t&: backReferences)) {
4782 // part of "after" before the backreference
4783 len = backReference.pos - lastEnd;
4784 if (len > 0) {
4785 chunks << afterView.mid(pos: lastEnd, n: len);
4786 newLength += len;
4787 }
4788
4789 // backreference itself
4790 len = match.capturedLength(nth: backReference.no);
4791 if (len > 0) {
4792 chunks << copyView.mid(pos: match.capturedStart(nth: backReference.no), n: len);
4793 newLength += len;
4794 }
4795
4796 lastEnd = backReference.pos + backReference.len;
4797 }
4798
4799 // add the last part of the after string
4800 len = afterView.size() - lastEnd;
4801 if (len > 0) {
4802 chunks << afterView.mid(pos: lastEnd, n: len);
4803 newLength += len;
4804 }
4805
4806 lastEnd = match.capturedEnd();
4807 }
4808
4809 // 3. trailing string after the last match
4810 if (copyView.size() > lastEnd) {
4811 chunks << copyView.mid(pos: lastEnd);
4812 newLength += copyView.size() - lastEnd;
4813 }
4814
4815 // 4. assemble the chunks together
4816 resize(size: newLength);
4817 qsizetype i = 0;
4818 QChar *uc = data();
4819 for (const QStringView &chunk : std::as_const(t&: chunks)) {
4820 qsizetype len = chunk.size();
4821 memcpy(dest: uc + i, src: chunk.constData(), n: len * sizeof(QChar));
4822 i += len;
4823 }
4824
4825 return *this;
4826}
4827#endif // QT_CONFIG(regularexpression)
4828
4829/*!
4830 Returns the number of (potentially overlapping) occurrences of
4831 the string \a str in this string.
4832
4833 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4834
4835 \sa contains(), indexOf()
4836*/
4837
4838qsizetype QString::count(const QString &str, Qt::CaseSensitivity cs) const
4839{
4840 return QtPrivate::count(haystack: QStringView(unicode(), size()), needle: QStringView(str.unicode(), str.size()), cs);
4841}
4842
4843/*!
4844 \overload count()
4845
4846 Returns the number of occurrences of character \a ch in the string.
4847
4848 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4849
4850 \sa contains(), indexOf()
4851*/
4852
4853qsizetype QString::count(QChar ch, Qt::CaseSensitivity cs) const
4854{
4855 return QtPrivate::count(haystack: QStringView(unicode(), size()), needle: ch, cs);
4856}
4857
4858/*!
4859 \since 6.0
4860 \overload count()
4861 Returns the number of (potentially overlapping) occurrences of the
4862 string view \a str in this string.
4863
4864 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4865
4866 \sa contains(), indexOf()
4867*/
4868qsizetype QString::count(QStringView str, Qt::CaseSensitivity cs) const
4869{
4870 return QtPrivate::count(haystack: *this, needle: str, cs);
4871}
4872
4873/*! \fn bool QString::contains(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4874
4875 Returns \c true if this string contains an occurrence of the string
4876 \a str; otherwise returns \c false.
4877
4878 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4879
4880 Example:
4881 \snippet qstring/main.cpp 17
4882
4883 \sa indexOf(), count()
4884*/
4885
4886/*! \fn bool QString::contains(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4887 \since 5.3
4888
4889 \overload contains()
4890
4891 Returns \c true if this string contains an occurrence of the latin-1 string
4892 \a str; otherwise returns \c false.
4893*/
4894
4895/*! \fn bool QString::contains(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4896
4897 \overload contains()
4898
4899 Returns \c true if this string contains an occurrence of the
4900 character \a ch; otherwise returns \c false.
4901*/
4902
4903/*! \fn bool QString::contains(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4904 \since 5.14
4905 \overload contains()
4906
4907 Returns \c true if this string contains an occurrence of the string view
4908 \a str; otherwise returns \c false.
4909
4910 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4911
4912 \sa indexOf(), count()
4913*/
4914
4915#if QT_CONFIG(regularexpression)
4916/*!
4917 \since 5.5
4918
4919 Returns the index position of the first match of the regular
4920 expression \a re in the string, searching forward from index
4921 position \a from. Returns -1 if \a re didn't match anywhere.
4922
4923 If the match is successful and \a rmatch is not \nullptr, it also
4924 writes the results of the match into the QRegularExpressionMatch object
4925 pointed to by \a rmatch.
4926
4927 Example:
4928
4929 \snippet qstring/main.cpp 93
4930*/
4931qsizetype QString::indexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4932{
4933 return QtPrivate::indexOf(viewHaystack: QStringView(*this), stringHaystack: this, re, from, rmatch);
4934}
4935
4936/*!
4937 \since 5.5
4938
4939 Returns the index position of the last match of the regular
4940 expression \a re in the string, which starts before the index
4941 position \a from.
4942
4943 \include qstring.qdocinc negative-index-start-search-from-end
4944
4945 Returns -1 if \a re didn't match anywhere.
4946
4947 If the match is successful and \a rmatch is not \nullptr, it also
4948 writes the results of the match into the QRegularExpressionMatch object
4949 pointed to by \a rmatch.
4950
4951 Example:
4952
4953 \snippet qstring/main.cpp 94
4954
4955 \note Due to how the regular expression matching algorithm works,
4956 this function will actually match repeatedly from the beginning of
4957 the string until the position \a from is reached.
4958
4959 \note When searching for a regular expression \a re that may match
4960 0 characters, the match at the end of the data is excluded from the
4961 search by a negative \a from, even though \c{-1} is normally
4962 thought of as searching from the end of the string: the match at
4963 the end is \e after the last character, so it is excluded. To
4964 include such a final empty match, either give a positive value for
4965 \a from or omit the \a from parameter entirely.
4966*/
4967qsizetype QString::lastIndexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4968{
4969 return QtPrivate::lastIndexOf(viewHaystack: QStringView(*this), stringHaystack: this, re, from, rmatch);
4970}
4971
4972/*!
4973 \fn qsizetype QString::lastIndexOf(const QRegularExpression &re, QRegularExpressionMatch *rmatch = nullptr) const
4974 \since 6.2
4975 \overload lastIndexOf()
4976
4977 Returns the index position of the last match of the regular
4978 expression \a re in the string. Returns -1 if \a re didn't match anywhere.
4979
4980 If the match is successful and \a rmatch is not \nullptr, it also
4981 writes the results of the match into the QRegularExpressionMatch object
4982 pointed to by \a rmatch.
4983
4984 Example:
4985
4986 \snippet qstring/main.cpp 94
4987
4988 \note Due to how the regular expression matching algorithm works,
4989 this function will actually match repeatedly from the beginning of
4990 the string until the end of the string is reached.
4991*/
4992
4993/*!
4994 \since 5.1
4995
4996 Returns \c true if the regular expression \a re matches somewhere in this
4997 string; otherwise returns \c false.
4998
4999 If the match is successful and \a rmatch is not \nullptr, it also
5000 writes the results of the match into the QRegularExpressionMatch object
5001 pointed to by \a rmatch.
5002
5003 \sa QRegularExpression::match()
5004*/
5005
5006bool QString::contains(const QRegularExpression &re, QRegularExpressionMatch *rmatch) const
5007{
5008 return QtPrivate::contains(viewHaystack: QStringView(*this), stringHaystack: this, re, rmatch);
5009}
5010
5011/*!
5012 \overload count()
5013 \since 5.0
5014
5015 Returns the number of times the regular expression \a re matches
5016 in the string.
5017
5018 For historical reasons, this function counts overlapping matches,
5019 so in the example below, there are four instances of "ana" or
5020 "ama":
5021
5022 \snippet qstring/main.cpp 95
5023
5024 This behavior is different from simply iterating over the matches
5025 in the string using QRegularExpressionMatchIterator.
5026
5027 \sa QRegularExpression::globalMatch()
5028*/
5029qsizetype QString::count(const QRegularExpression &re) const
5030{
5031 return QtPrivate::count(haystack: QStringView(*this), re);
5032}
5033#endif // QT_CONFIG(regularexpression)
5034
5035#if QT_DEPRECATED_SINCE(6, 4)
5036/*! \fn qsizetype QString::count() const
5037 \deprecated [6.4] Use size() or length() instead.
5038 \overload count()
5039
5040 Same as size().
5041*/
5042#endif
5043
5044/*!
5045 \enum QString::SectionFlag
5046
5047 This enum specifies flags that can be used to affect various
5048 aspects of the section() function's behavior with respect to
5049 separators and empty fields.
5050
5051 \value SectionDefault Empty fields are counted, leading and
5052 trailing separators are not included, and the separator is
5053 compared case sensitively.
5054
5055 \value SectionSkipEmpty Treat empty fields as if they don't exist,
5056 i.e. they are not considered as far as \e start and \e end are
5057 concerned.
5058
5059 \value SectionIncludeLeadingSep Include the leading separator (if
5060 any) in the result string.
5061
5062 \value SectionIncludeTrailingSep Include the trailing separator
5063 (if any) in the result string.
5064
5065 \value SectionCaseInsensitiveSeps Compare the separator
5066 case-insensitively.
5067
5068 \sa section()
5069*/
5070
5071/*!
5072 \fn QString QString::section(QChar sep, qsizetype start, qsizetype end = -1, SectionFlags flags) const
5073
5074 This function returns a section of the string.
5075
5076 This string is treated as a sequence of fields separated by the
5077 character, \a sep. The returned string consists of the fields from
5078 position \a start to position \a end inclusive. If \a end is not
5079 specified, all fields from position \a start to the end of the
5080 string are included. Fields are numbered 0, 1, 2, etc., counting
5081 from the left, and -1, -2, etc., counting from right to left.
5082
5083 The \a flags argument can be used to affect some aspects of the
5084 function's behavior, e.g. whether to be case sensitive, whether
5085 to skip empty fields and how to deal with leading and trailing
5086 separators; see \l{SectionFlags}.
5087
5088 \snippet qstring/main.cpp 52
5089
5090 If \a start or \a end is negative, we count fields from the right
5091 of the string, the right-most field being -1, the one from
5092 right-most field being -2, and so on.
5093
5094 \snippet qstring/main.cpp 53
5095
5096 \sa split()
5097*/
5098
5099/*!
5100 \overload section()
5101
5102 \snippet qstring/main.cpp 51
5103 \snippet qstring/main.cpp 54
5104
5105 \sa split()
5106*/
5107
5108QString QString::section(const QString &sep, qsizetype start, qsizetype end, SectionFlags flags) const
5109{
5110 const QList<QStringView> sections = QStringView{ *this }.split(
5111 sep, behavior: Qt::KeepEmptyParts, cs: (flags & SectionCaseInsensitiveSeps) ? Qt::CaseInsensitive : Qt::CaseSensitive);
5112 const qsizetype sectionsSize = sections.size();
5113 if (!(flags & SectionSkipEmpty)) {
5114 if (start < 0)
5115 start += sectionsSize;
5116 if (end < 0)
5117 end += sectionsSize;
5118 } else {
5119 qsizetype skip = 0;
5120 for (qsizetype k = 0; k < sectionsSize; ++k) {
5121 if (sections.at(i: k).isEmpty())
5122 skip++;
5123 }
5124 if (start < 0)
5125 start += sectionsSize - skip;
5126 if (end < 0)
5127 end += sectionsSize - skip;
5128 }
5129 if (start >= sectionsSize || end < 0 || start > end)
5130 return QString();
5131
5132 QString ret;
5133 qsizetype first_i = start, last_i = end;
5134 for (qsizetype x = 0, i = 0; x <= end && i < sectionsSize; ++i) {
5135 const QStringView &section = sections.at(i);
5136 const bool empty = section.isEmpty();
5137 if (x >= start) {
5138 if (x == start)
5139 first_i = i;
5140 if (x == end)
5141 last_i = i;
5142 if (x > start && i > 0)
5143 ret += sep;
5144 ret += section;
5145 }
5146 if (!empty || !(flags & SectionSkipEmpty))
5147 x++;
5148 }
5149 if ((flags & SectionIncludeLeadingSep) && first_i > 0)
5150 ret.prepend(s: sep);
5151 if ((flags & SectionIncludeTrailingSep) && last_i < sectionsSize - 1)
5152 ret += sep;
5153 return ret;
5154}
5155
5156#if QT_CONFIG(regularexpression)
5157class qt_section_chunk {
5158public:
5159 qt_section_chunk() {}
5160 qt_section_chunk(qsizetype l, QStringView s) : length(l), string(std::move(s)) {}
5161 qsizetype length;
5162 QStringView string;
5163};
5164Q_DECLARE_TYPEINFO(qt_section_chunk, Q_RELOCATABLE_TYPE);
5165
5166static QString extractSections(QSpan<qt_section_chunk> sections, qsizetype start, qsizetype end,
5167 QString::SectionFlags flags)
5168{
5169 const qsizetype sectionsSize = sections.size();
5170
5171 if (!(flags & QString::SectionSkipEmpty)) {
5172 if (start < 0)
5173 start += sectionsSize;
5174 if (end < 0)
5175 end += sectionsSize;
5176 } else {
5177 qsizetype skip = 0;
5178 for (qsizetype k = 0; k < sectionsSize; ++k) {
5179 const qt_section_chunk &section = sections[k];
5180 if (section.length == section.string.size())
5181 skip++;
5182 }
5183 if (start < 0)
5184 start += sectionsSize - skip;
5185 if (end < 0)
5186 end += sectionsSize - skip;
5187 }
5188 if (start >= sectionsSize || end < 0 || start > end)
5189 return QString();
5190
5191 QString ret;
5192 qsizetype x = 0;
5193 qsizetype first_i = start, last_i = end;
5194 for (qsizetype i = 0; x <= end && i < sectionsSize; ++i) {
5195 const qt_section_chunk &section = sections[i];
5196 const bool empty = (section.length == section.string.size());
5197 if (x >= start) {
5198 if (x == start)
5199 first_i = i;
5200 if (x == end)
5201 last_i = i;
5202 if (x != start)
5203 ret += section.string;
5204 else
5205 ret += section.string.mid(pos: section.length);
5206 }
5207 if (!empty || !(flags & QString::SectionSkipEmpty))
5208 x++;
5209 }
5210
5211 if ((flags & QString::SectionIncludeLeadingSep) && first_i >= 0) {
5212 const qt_section_chunk &section = sections[first_i];
5213 ret.prepend(v: section.string.left(n: section.length));
5214 }
5215
5216 if ((flags & QString::SectionIncludeTrailingSep)
5217 && last_i < sectionsSize - 1) {
5218 const qt_section_chunk &section = sections[last_i + 1];
5219 ret += section.string.left(n: section.length);
5220 }
5221
5222 return ret;
5223}
5224
5225/*!
5226 \overload section()
5227 \since 5.0
5228
5229 This string is treated as a sequence of fields separated by the
5230 regular expression, \a re.
5231
5232 \snippet qstring/main.cpp 89
5233
5234 \warning Using this QRegularExpression version is much more expensive than
5235 the overloaded string and character versions.
5236
5237 \sa split(), simplified()
5238*/
5239QString QString::section(const QRegularExpression &re, qsizetype start, qsizetype end, SectionFlags flags) const
5240{
5241 if (!re.isValid()) {
5242 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString::section");
5243 return QString();
5244 }
5245
5246 const QChar *uc = unicode();
5247 if (!uc)
5248 return QString();
5249
5250 QRegularExpression sep(re);
5251 if (flags & SectionCaseInsensitiveSeps)
5252 sep.setPatternOptions(sep.patternOptions() | QRegularExpression::CaseInsensitiveOption);
5253
5254 QVarLengthArray<qt_section_chunk> sections;
5255 qsizetype n = size(), m = 0, last_m = 0, last_len = 0;
5256 QRegularExpressionMatchIterator iterator = sep.globalMatch(subject: *this);
5257 while (iterator.hasNext()) {
5258 QRegularExpressionMatch match = iterator.next();
5259 m = match.capturedStart();
5260 sections.append(t: qt_section_chunk(last_len, QStringView{ *this }.sliced(pos: last_m, n: m - last_m)));
5261 last_m = m;
5262 last_len = match.capturedLength();
5263 }
5264 sections.append(t: qt_section_chunk(last_len, QStringView{ *this }.sliced(pos: last_m, n: n - last_m)));
5265
5266 return extractSections(sections, start, end, flags);
5267}
5268#endif // QT_CONFIG(regularexpression)
5269
5270/*!
5271 \fn QString QString::left(qsizetype n) const &
5272 \fn QString QString::left(qsizetype n) &&
5273
5274 Returns a substring that contains the \a n leftmost characters
5275 of the string.
5276
5277 If you know that \a n cannot be out of bounds, use first() instead in new
5278 code, because it is faster.
5279
5280 The entire string is returned if \a n is greater than or equal
5281 to size(), or less than zero.
5282
5283 \sa first(), last(), startsWith(), chopped(), chop(), truncate()
5284*/
5285
5286/*!
5287 \fn QString QString::right(qsizetype n) const &
5288 \fn QString QString::right(qsizetype n) &&
5289
5290 Returns a substring that contains the \a n rightmost characters
5291 of the string.
5292
5293 If you know that \a n cannot be out of bounds, use last() instead in new
5294 code, because it is faster.
5295
5296 The entire string is returned if \a n is greater than or equal
5297 to size(), or less than zero.
5298
5299 \sa endsWith(), last(), first(), sliced(), chopped(), chop(), truncate(), slice()
5300*/
5301
5302/*!
5303 \fn QString QString::mid(qsizetype position, qsizetype n) const &
5304 \fn QString QString::mid(qsizetype position, qsizetype n) &&
5305
5306 Returns a string that contains \a n characters of this string,
5307 starting at the specified \a position index.
5308
5309 If you know that \a position and \a n cannot be out of bounds, use sliced()
5310 instead in new code, because it is faster.
5311
5312 Returns a null string if the \a position index exceeds the
5313 length of the string. If there are less than \a n characters
5314 available in the string starting at the given \a position, or if
5315 \a n is -1 (default), the function returns all characters that
5316 are available from the specified \a position.
5317
5318 \sa first(), last(), sliced(), chopped(), chop(), truncate(), slice()
5319*/
5320QString QString::mid(qsizetype position, qsizetype n) const &
5321{
5322 qsizetype p = position;
5323 qsizetype l = n;
5324 using namespace QtPrivate;
5325 switch (QContainerImplHelper::mid(originalLength: size(), position: &p, length: &l)) {
5326 case QContainerImplHelper::Null:
5327 return QString();
5328 case QContainerImplHelper::Empty:
5329 return QString(DataPointer::fromRawData(rawData: &_empty, length: 0));
5330 case QContainerImplHelper::Full:
5331 return *this;
5332 case QContainerImplHelper::Subset:
5333 return sliced(pos: p, n: l);
5334 }
5335 Q_UNREACHABLE_RETURN(QString());
5336}
5337
5338QString QString::mid(qsizetype position, qsizetype n) &&
5339{
5340 qsizetype p = position;
5341 qsizetype l = n;
5342 using namespace QtPrivate;
5343 switch (QContainerImplHelper::mid(originalLength: size(), position: &p, length: &l)) {
5344 case QContainerImplHelper::Null:
5345 return QString();
5346 case QContainerImplHelper::Empty:
5347 resize(size: 0); // keep capacity if we've reserve()d
5348 [[fallthrough]];
5349 case QContainerImplHelper::Full:
5350 return std::move(*this);
5351 case QContainerImplHelper::Subset:
5352 return std::move(*this).sliced(pos: p, n: l);
5353 }
5354 Q_UNREACHABLE_RETURN(QString());
5355}
5356
5357/*!
5358 \fn QString QString::first(qsizetype n) const &
5359 \fn QString QString::first(qsizetype n) &&
5360 \since 6.0
5361
5362 Returns a string that contains the first \a n characters
5363 of this string.
5364
5365 \note The behavior is undefined when \a n < 0 or \a n > size().
5366
5367 \snippet qstring/main.cpp 31
5368
5369 \sa last(), sliced(), startsWith(), chopped(), chop(), truncate(), slice()
5370*/
5371
5372/*!
5373 \fn QString QString::last(qsizetype n) const &
5374 \fn QString QString::last(qsizetype n) &&
5375 \since 6.0
5376
5377 Returns the string that contains the last \a n characters of this string.
5378
5379 \note The behavior is undefined when \a n < 0 or \a n > size().
5380
5381 \snippet qstring/main.cpp 48
5382
5383 \sa first(), sliced(), endsWith(), chopped(), chop(), truncate(), slice()
5384*/
5385
5386/*!
5387 \fn QString QString::sliced(qsizetype pos, qsizetype n) const &
5388 \fn QString QString::sliced(qsizetype pos, qsizetype n) &&
5389 \since 6.0
5390
5391 Returns a string that contains \a n characters of this string,
5392 starting at position \a pos.
5393
5394 \note The behavior is undefined when \a pos < 0, \a n < 0,
5395 or \a pos + \a n > size().
5396
5397 \snippet qstring/main.cpp 34
5398
5399 \sa first(), last(), chopped(), chop(), truncate(), slice()
5400*/
5401QString QString::sliced_helper(QString &str, qsizetype pos, qsizetype n)
5402{
5403 if (n == 0)
5404 return QString(DataPointer::fromRawData(rawData: &_empty, length: 0));
5405 DataPointer d = std::move(str.d).sliced(pos, n);
5406 d.data()[n] = 0;
5407 return QString(std::move(d));
5408}
5409
5410/*!
5411 \fn QString QString::sliced(qsizetype pos) const &
5412 \fn QString QString::sliced(qsizetype pos) &&
5413 \since 6.0
5414 \overload
5415
5416 Returns a string that contains the portion of this string starting at
5417 position \a pos and extending to its end.
5418
5419 \note The behavior is undefined when \a pos < 0 or \a pos > size().
5420
5421 \sa first(), last(), chopped(), chop(), truncate(), slice()
5422*/
5423
5424/*!
5425 \fn QString &QString::slice(qsizetype pos, qsizetype n)
5426 \since 6.8
5427
5428 Modifies this string to start at position \a pos, extending for \a n
5429 characters (code points), and returns a reference to this string.
5430
5431 \note The behavior is undefined if \a pos < 0, \a n < 0,
5432 or \a pos + \a n > size().
5433
5434 \snippet qstring/main.cpp 97
5435
5436 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5437*/
5438
5439/*!
5440 \fn QString &QString::slice(qsizetype pos)
5441 \since 6.8
5442 \overload
5443
5444 Modifies this string to start at position \a pos and extending to its end,
5445 and returns a reference to this string.
5446
5447 \note The behavior is undefined if \a pos < 0 or \a pos > size().
5448
5449 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5450*/
5451
5452/*!
5453 \fn QString QString::chopped(qsizetype len) const &
5454 \fn QString QString::chopped(qsizetype len) &&
5455 \since 5.10
5456
5457 Returns a string that contains the size() - \a len leftmost characters
5458 of this string.
5459
5460 \note The behavior is undefined if \a len is negative or greater than size().
5461
5462 \sa endsWith(), first(), last(), sliced(), chop(), truncate(), slice()
5463*/
5464
5465/*!
5466 Returns \c true if the string starts with \a s; otherwise returns
5467 \c false.
5468
5469 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5470
5471 \snippet qstring/main.cpp 65
5472
5473 \sa endsWith()
5474*/
5475bool QString::startsWith(const QString& s, Qt::CaseSensitivity cs) const
5476{
5477 return qt_starts_with_impl(haystack: QStringView(*this), needle: QStringView(s), cs);
5478}
5479
5480/*!
5481 \overload startsWith()
5482 */
5483bool QString::startsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5484{
5485 return qt_starts_with_impl(haystack: QStringView(*this), needle: s, cs);
5486}
5487
5488/*!
5489 \overload startsWith()
5490
5491 Returns \c true if the string starts with \a c; otherwise returns
5492 \c false.
5493*/
5494bool QString::startsWith(QChar c, Qt::CaseSensitivity cs) const
5495{
5496 if (!size())
5497 return false;
5498 if (cs == Qt::CaseSensitive)
5499 return at(i: 0) == c;
5500 return foldCase(ch: at(i: 0)) == foldCase(ch: c);
5501}
5502
5503/*!
5504 \fn bool QString::startsWith(QStringView str, Qt::CaseSensitivity cs) const
5505 \since 5.10
5506 \overload
5507
5508 Returns \c true if the string starts with the string view \a str;
5509 otherwise returns \c false.
5510
5511 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5512
5513 \sa endsWith()
5514*/
5515
5516/*!
5517 Returns \c true if the string ends with \a s; otherwise returns
5518 \c false.
5519
5520 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5521
5522 \snippet qstring/main.cpp 20
5523
5524 \sa startsWith()
5525*/
5526bool QString::endsWith(const QString &s, Qt::CaseSensitivity cs) const
5527{
5528 return qt_ends_with_impl(haystack: QStringView(*this), needle: QStringView(s), cs);
5529}
5530
5531/*!
5532 \fn bool QString::endsWith(QStringView str, Qt::CaseSensitivity cs) const
5533 \since 5.10
5534 \overload endsWith()
5535 Returns \c true if the string ends with the string view \a str;
5536 otherwise returns \c false.
5537
5538 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5539
5540 \sa startsWith()
5541*/
5542
5543/*!
5544 \overload endsWith()
5545*/
5546bool QString::endsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5547{
5548 return qt_ends_with_impl(haystack: QStringView(*this), needle: s, cs);
5549}
5550
5551/*!
5552 Returns \c true if the string ends with \a c; otherwise returns
5553 \c false.
5554
5555 \overload endsWith()
5556 */
5557bool QString::endsWith(QChar c, Qt::CaseSensitivity cs) const
5558{
5559 if (!size())
5560 return false;
5561 if (cs == Qt::CaseSensitive)
5562 return at(i: size() - 1) == c;
5563 return foldCase(ch: at(i: size() - 1)) == foldCase(ch: c);
5564}
5565
5566static bool checkCase(QStringView s, QUnicodeTables::Case c) noexcept
5567{
5568 QStringIterator it(s);
5569 while (it.hasNext()) {
5570 const char32_t uc = it.next();
5571 if (qGetProp(ucs4: uc)->cases[c].diff)
5572 return false;
5573 }
5574 return true;
5575}
5576
5577bool QtPrivate::isLower(QStringView s) noexcept
5578{
5579 return checkCase(s, c: QUnicodeTables::LowerCase);
5580}
5581
5582bool QtPrivate::isUpper(QStringView s) noexcept
5583{
5584 return checkCase(s, c: QUnicodeTables::UpperCase);
5585}
5586
5587/*!
5588 Returns \c true if the string is uppercase, that is, it's identical
5589 to its toUpper() folding.
5590
5591 Note that this does \e not mean that the string does not contain
5592 lowercase letters (some lowercase letters do not have a uppercase
5593 folding; they are left unchanged by toUpper()).
5594 For more information, refer to the Unicode standard, section 3.13.
5595
5596 \since 5.12
5597
5598 \sa QChar::toUpper(), isLower()
5599*/
5600bool QString::isUpper() const
5601{
5602 return QtPrivate::isUpper(s: qToStringViewIgnoringNull(s: *this));
5603}
5604
5605/*!
5606 Returns \c true if the string is lowercase, that is, it's identical
5607 to its toLower() folding.
5608
5609 Note that this does \e not mean that the string does not contain
5610 uppercase letters (some uppercase letters do not have a lowercase
5611 folding; they are left unchanged by toLower()).
5612 For more information, refer to the Unicode standard, section 3.13.
5613
5614 \since 5.12
5615
5616 \sa QChar::toLower(), isUpper()
5617 */
5618bool QString::isLower() const
5619{
5620 return QtPrivate::isLower(s: qToStringViewIgnoringNull(s: *this));
5621}
5622
5623static QByteArray qt_convert_to_latin1(QStringView string);
5624
5625QByteArray QString::toLatin1_helper(const QString &string)
5626{
5627 return qt_convert_to_latin1(string);
5628}
5629
5630/*!
5631 \since 6.0
5632 \internal
5633 \relates QAnyStringView
5634
5635 Returns a UTF-16 representation of \a string as a QString.
5636
5637 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5638 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5639*/
5640QString QtPrivate::convertToQString(QAnyStringView string)
5641{
5642 return string.visit(v: [] (auto string) { return string.toString(); });
5643}
5644
5645/*!
5646 \since 5.10
5647 \internal
5648 \relates QStringView
5649
5650 Returns a Latin-1 representation of \a string as a QByteArray.
5651
5652 The behavior is undefined if \a string contains non-Latin1 characters.
5653
5654 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5655 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5656*/
5657QByteArray QtPrivate::convertToLatin1(QStringView string)
5658{
5659 return qt_convert_to_latin1(string);
5660}
5661
5662Q_NEVER_INLINE
5663static QByteArray qt_convert_to_latin1(QStringView string)
5664{
5665 if (Q_UNLIKELY(string.isNull()))
5666 return QByteArray();
5667
5668 QByteArray ba(string.size(), Qt::Uninitialized);
5669
5670 // since we own the only copy, we're going to const_cast the constData;
5671 // that avoids an unnecessary call to detach() and expansion code that will never get used
5672 qt_to_latin1(dst: reinterpret_cast<uchar *>(const_cast<char *>(ba.constData())),
5673 src: string.utf16(), length: string.size());
5674 return ba;
5675}
5676
5677QByteArray QString::toLatin1_helper_inplace(QString &s)
5678{
5679 if (!s.isDetached())
5680 return qt_convert_to_latin1(string: s);
5681
5682 // We can return our own buffer to the caller.
5683 // Conversion to Latin-1 always shrinks the buffer by half.
5684 // This relies on the fact that we use QArrayData for everything behind the scenes
5685
5686 // First, do the in-place conversion. Since isDetached() == true, the data
5687 // was allocated by QArrayData, so the null terminator must be there.
5688 qsizetype length = s.size();
5689 char16_t *sdata = s.d->data();
5690 Q_ASSERT(sdata[length] == u'\0');
5691 qt_to_latin1(dst: reinterpret_cast<uchar *>(sdata), src: sdata, length: length + 1);
5692
5693 // Move the internals over to the byte array.
5694 // Kids, avert your eyes. Don't try this at home.
5695 auto ba_d = std::move(s.d).reinterpreted<char>();
5696
5697 // Some sanity checks
5698 Q_ASSERT(ba_d.d->allocatedCapacity() >= ba_d.size);
5699 Q_ASSERT(s.isNull());
5700 Q_ASSERT(s.isEmpty());
5701 Q_ASSERT(s.constData() == QString().constData());
5702
5703 return QByteArray(std::move(ba_d));
5704}
5705
5706// QLatin1 methods that use helpers from qstring.cpp
5707char16_t *QLatin1::convertToUnicode(char16_t *out, QLatin1StringView in) noexcept
5708{
5709 const qsizetype len = in.size();
5710 qt_from_latin1(dst: out, str: in.data(), size: len);
5711 return std::next(x: out, n: len);
5712}
5713
5714char *QLatin1::convertFromUnicode(char *out, QStringView in) noexcept
5715{
5716 const qsizetype len = in.size();
5717 qt_to_latin1(dst: reinterpret_cast<uchar *>(out), src: in.utf16(), length: len);
5718 return out + len;
5719}
5720
5721/*!
5722 \fn QByteArray QString::toLatin1() const
5723
5724 Returns a Latin-1 representation of the string as a QByteArray.
5725
5726 The returned byte array is undefined if the string contains non-Latin1
5727 characters. Those characters may be suppressed or replaced with a
5728 question mark.
5729
5730 \sa fromLatin1(), toUtf8(), toLocal8Bit(), QStringEncoder
5731*/
5732
5733static QByteArray qt_convert_to_local_8bit(QStringView string);
5734
5735/*!
5736 \fn QByteArray QString::toLocal8Bit() const
5737
5738 Returns the local 8-bit representation of the string as a
5739 QByteArray.
5740
5741 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {toUtf8}
5742
5743 If this string contains any characters that cannot be encoded in the
5744 local 8-bit encoding, the returned byte array is undefined. Those
5745 characters may be suppressed or replaced by another.
5746
5747 \sa fromLocal8Bit(), toLatin1(), toUtf8(), QStringEncoder
5748*/
5749
5750QByteArray QString::toLocal8Bit_helper(const QChar *data, qsizetype size)
5751{
5752 return qt_convert_to_local_8bit(string: QStringView(data, size));
5753}
5754
5755static QByteArray qt_convert_to_local_8bit(QStringView string)
5756{
5757 if (string.isNull())
5758 return QByteArray();
5759 QStringEncoder fromUtf16(QStringEncoder::System, QStringEncoder::Flag::Stateless);
5760 return fromUtf16(string);
5761}
5762
5763/*!
5764 \since 5.10
5765 \internal
5766 \relates QStringView
5767
5768 Returns a local 8-bit representation of \a string as a QByteArray.
5769
5770 On Unix systems this is equivalent to toUtf8(), on Windows the systems
5771 current code page is being used.
5772
5773 The behavior is undefined if \a string contains characters not
5774 supported by the locale's 8-bit encoding.
5775
5776 \sa QString::toLocal8Bit(), QStringView::toLocal8Bit()
5777*/
5778QByteArray QtPrivate::convertToLocal8Bit(QStringView string)
5779{
5780 return qt_convert_to_local_8bit(string);
5781}
5782
5783static QByteArray qt_convert_to_utf8(QStringView str);
5784
5785/*!
5786 \fn QByteArray QString::toUtf8() const
5787
5788 Returns a UTF-8 representation of the string as a QByteArray.
5789
5790 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5791 string like QString.
5792
5793 \sa fromUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder
5794*/
5795
5796QByteArray QString::toUtf8_helper(const QString &str)
5797{
5798 return qt_convert_to_utf8(str);
5799}
5800
5801static QByteArray qt_convert_to_utf8(QStringView str)
5802{
5803 if (str.isNull())
5804 return QByteArray();
5805
5806 return QUtf8::convertFromUnicode(in: str);
5807}
5808
5809/*!
5810 \since 5.10
5811 \internal
5812 \relates QStringView
5813
5814 Returns a UTF-8 representation of \a string as a QByteArray.
5815
5816 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5817 string like QStringView.
5818
5819 \sa QString::toUtf8(), QStringView::toUtf8()
5820*/
5821QByteArray QtPrivate::convertToUtf8(QStringView string)
5822{
5823 return qt_convert_to_utf8(str: string);
5824}
5825
5826static QList<uint> qt_convert_to_ucs4(QStringView string);
5827
5828/*!
5829 \since 4.2
5830
5831 Returns a UCS-4/UTF-32 representation of the string as a QList<uint>.
5832
5833 UCS-4 is a Unicode codec and therefore it is lossless. All characters from
5834 this string will be encoded in UCS-4. Any invalid sequence of code units in
5835 this string is replaced by the Unicode's replacement character
5836 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5837
5838 The returned list is not \\0'-terminated.
5839
5840 \sa fromUtf8(), toUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder,
5841 fromUcs4(), toWCharArray()
5842*/
5843QList<uint> QString::toUcs4() const
5844{
5845 return qt_convert_to_ucs4(string: *this);
5846}
5847
5848static QList<uint> qt_convert_to_ucs4(QStringView string)
5849{
5850 QList<uint> v(string.size());
5851 uint *a = const_cast<uint*>(v.constData());
5852 QStringIterator it(string);
5853 while (it.hasNext())
5854 *a++ = it.next();
5855 v.resize(size: a - v.constData());
5856 return v;
5857}
5858
5859/*!
5860 \since 5.10
5861 \internal
5862 \relates QStringView
5863
5864 Returns a UCS-4/UTF-32 representation of \a string as a QList<uint>.
5865
5866 UCS-4 is a Unicode codec and therefore it is lossless. All characters from
5867 this string will be encoded in UCS-4. Any invalid sequence of code units in
5868 this string is replaced by the Unicode's replacement character
5869 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5870
5871 The returned list is not \\0'-terminated.
5872
5873 \sa QString::toUcs4(), QStringView::toUcs4(), QtPrivate::convertToLatin1(),
5874 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUtf8()
5875*/
5876QList<uint> QtPrivate::convertToUcs4(QStringView string)
5877{
5878 return qt_convert_to_ucs4(string);
5879}
5880
5881/*!
5882 \fn QString QString::fromLatin1(QByteArrayView str)
5883 \overload
5884 \since 6.0
5885
5886 Returns a QString initialized with the Latin-1 string \a str.
5887
5888 \note: any null ('\\0') bytes in the byte array will be included in this
5889 string, converted to Unicode null characters (U+0000).
5890*/
5891QString QString::fromLatin1(QByteArrayView ba)
5892{
5893 DataPointer d;
5894 if (!ba.data()) {
5895 // nothing to do
5896 } else if (ba.size() == 0) {
5897 d = DataPointer::fromRawData(rawData: &_empty, length: 0);
5898 } else {
5899 d = DataPointer(ba.size(), ba.size());
5900 Q_CHECK_PTR(d.data());
5901 d.data()[ba.size()] = '\0';
5902 char16_t *dst = d.data();
5903
5904 qt_from_latin1(dst, str: ba.data(), size: size_t(ba.size()));
5905 }
5906 return QString(std::move(d));
5907}
5908
5909/*!
5910 \fn QString QString::fromLatin1(const char *str, qsizetype size)
5911 Returns a QString initialized with the first \a size characters
5912 of the Latin-1 string \a str.
5913
5914 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5915
5916 \sa toLatin1(), fromUtf8(), fromLocal8Bit()
5917*/
5918
5919/*!
5920 \fn QString QString::fromLatin1(const QByteArray &str)
5921 \overload
5922 \since 5.0
5923
5924 Returns a QString initialized with the Latin-1 string \a str.
5925
5926 \note: any null ('\\0') bytes in the byte array will be included in this
5927 string, converted to Unicode null characters (U+0000). This behavior is
5928 different from Qt 5.x.
5929*/
5930
5931/*!
5932 \fn QString QString::fromLocal8Bit(const char *str, qsizetype size)
5933 Returns a QString initialized with the first \a size characters
5934 of the 8-bit string \a str.
5935
5936 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5937
5938 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5939
5940 \sa toLocal8Bit(), fromLatin1(), fromUtf8()
5941*/
5942
5943/*!
5944 \fn QString QString::fromLocal8Bit(const QByteArray &str)
5945 \overload
5946 \since 5.0
5947
5948 Returns a QString initialized with the 8-bit string \a str.
5949
5950 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5951
5952 \note: any null ('\\0') bytes in the byte array will be included in this
5953 string, converted to Unicode null characters (U+0000). This behavior is
5954 different from Qt 5.x.
5955*/
5956
5957/*!
5958 \fn QString QString::fromLocal8Bit(QByteArrayView str)
5959 \overload
5960 \since 6.0
5961
5962 Returns a QString initialized with the 8-bit string \a str.
5963
5964 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5965
5966 \note: any null ('\\0') bytes in the byte array will be included in this
5967 string, converted to Unicode null characters (U+0000).
5968*/
5969QString QString::fromLocal8Bit(QByteArrayView ba)
5970{
5971 if (ba.isNull())
5972 return QString();
5973 if (ba.isEmpty())
5974 return QString(DataPointer::fromRawData(rawData: &_empty, length: 0));
5975 QStringDecoder toUtf16(QStringDecoder::System, QStringDecoder::Flag::Stateless);
5976 return toUtf16(ba);
5977}
5978
5979/*! \fn QString QString::fromUtf8(const char *str, qsizetype size)
5980 Returns a QString initialized with the first \a size bytes
5981 of the UTF-8 string \a str.
5982
5983 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5984
5985 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5986 string like QString. However, invalid sequences are possible with UTF-8
5987 and, if any such are found, they will be replaced with one or more
5988 "replacement characters", or suppressed. These include non-Unicode
5989 sequences, non-characters, overlong sequences or surrogate codepoints
5990 encoded into UTF-8.
5991
5992 This function can be used to process incoming data incrementally as long as
5993 all UTF-8 characters are terminated within the incoming data. Any
5994 unterminated characters at the end of the string will be replaced or
5995 suppressed. In order to do stateful decoding, please use \l QStringDecoder.
5996
5997 \sa toUtf8(), fromLatin1(), fromLocal8Bit()
5998*/
5999
6000/*!
6001 \fn QString QString::fromUtf8(const char8_t *str)
6002 \overload
6003 \since 6.1
6004
6005 This overload is only available when compiling in C++20 mode.
6006*/
6007
6008/*!
6009 \fn QString QString::fromUtf8(const char8_t *str, qsizetype size)
6010 \overload
6011 \since 6.0
6012
6013 This overload is only available when compiling in C++20 mode.
6014*/
6015
6016/*!
6017 \fn QString QString::fromUtf8(const QByteArray &str)
6018 \overload
6019 \since 5.0
6020
6021 Returns a QString initialized with the UTF-8 string \a str.
6022
6023 \note: any null ('\\0') bytes in the byte array will be included in this
6024 string, converted to Unicode null characters (U+0000). This behavior is
6025 different from Qt 5.x.
6026*/
6027
6028/*!
6029 \fn QString QString::fromUtf8(QByteArrayView str)
6030 \overload
6031 \since 6.0
6032
6033 Returns a QString initialized with the UTF-8 string \a str.
6034
6035 \note: any null ('\\0') bytes in the byte array will be included in this
6036 string, converted to Unicode null characters (U+0000).
6037*/
6038QString QString::fromUtf8(QByteArrayView ba)
6039{
6040 if (ba.isNull())
6041 return QString();
6042 if (ba.isEmpty())
6043 return QString(DataPointer::fromRawData(rawData: &_empty, length: 0));
6044 return QUtf8::convertToUnicode(in: ba);
6045}
6046
6047#ifndef QT_BOOTSTRAPPED
6048/*!
6049 \since 5.3
6050 Returns a QString initialized with the first \a size characters
6051 of the Unicode string \a unicode (ISO-10646-UTF-16 encoded).
6052
6053 If \a size is -1 (default), \a unicode must be \\0'-terminated.
6054
6055 This function checks for a Byte Order Mark (BOM). If it is missing,
6056 host byte order is assumed.
6057
6058 This function is slow compared to the other Unicode conversions.
6059 Use QString(const QChar *, qsizetype) or QString(const QChar *) if possible.
6060
6061 QString makes a deep copy of the Unicode data.
6062
6063 \sa utf16(), setUtf16(), fromStdU16String()
6064*/
6065QString QString::fromUtf16(const char16_t *unicode, qsizetype size)
6066{
6067 if (!unicode)
6068 return QString();
6069 if (size < 0)
6070 size = QtPrivate::qustrlen(str: unicode);
6071 QStringDecoder toUtf16(QStringDecoder::Utf16, QStringDecoder::Flag::Stateless);
6072 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 2));
6073}
6074
6075/*!
6076 \fn QString QString::fromUtf16(const ushort *str, qsizetype size)
6077 \deprecated [6.0] Use the \c char16_t overload instead.
6078*/
6079
6080/*!
6081 \fn QString QString::fromUcs4(const uint *str, qsizetype size)
6082 \since 4.2
6083 \deprecated [6.0] Use the \c char32_t overload instead.
6084*/
6085
6086/*!
6087 \since 5.3
6088
6089 Returns a QString initialized with the first \a size characters
6090 of the Unicode string \a unicode (ISO-10646-UCS-4 encoded).
6091
6092 If \a size is -1 (default), \a unicode must be \\0'-terminated.
6093
6094 \sa toUcs4(), fromUtf16(), utf16(), setUtf16(), fromWCharArray(),
6095 fromStdU32String()
6096*/
6097QString QString::fromUcs4(const char32_t *unicode, qsizetype size)
6098{
6099 if (!unicode)
6100 return QString();
6101 if (size < 0) {
6102 size = 0;
6103 while (unicode[size] != 0)
6104 ++size;
6105 }
6106 QStringDecoder toUtf16(QStringDecoder::Utf32, QStringDecoder::Flag::Stateless);
6107 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 4));
6108}
6109#endif // !QT_BOOTSTRAPPED
6110
6111/*!
6112 Resizes the string to \a size characters and copies \a unicode
6113 into the string.
6114
6115 If \a unicode is \nullptr, nothing is copied, but the string is still
6116 resized to \a size.
6117
6118 \sa unicode(), setUtf16()
6119*/
6120QString& QString::setUnicode(const QChar *unicode, qsizetype size)
6121{
6122 resize(size);
6123 if (unicode && size)
6124 memcpy(dest: d.data(), src: unicode, n: size * sizeof(QChar));
6125 return *this;
6126}
6127
6128/*!
6129 \fn QString &QString::setUtf16(const ushort *unicode, qsizetype size)
6130
6131 Resizes the string to \a size characters and copies \a unicode
6132 into the string.
6133
6134 If \a unicode is \nullptr, nothing is copied, but the string is still
6135 resized to \a size.
6136
6137 Note that unlike fromUtf16(), this function does not consider BOMs and
6138 possibly differing byte ordering.
6139
6140 \sa utf16(), setUnicode()
6141*/
6142
6143/*!
6144 \fn QString QString::simplified() const
6145
6146 Returns a string that has whitespace removed from the start
6147 and the end, and that has each sequence of internal whitespace
6148 replaced with a single space.
6149
6150 Whitespace means any character for which QChar::isSpace() returns
6151 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6152 '\\f', '\\r', and ' '.
6153
6154 Example:
6155
6156 \snippet qstring/main.cpp 57
6157
6158 \sa trimmed()
6159*/
6160QString QString::simplified_helper(const QString &str)
6161{
6162 return QStringAlgorithms<const QString>::simplified_helper(str);
6163}
6164
6165QString QString::simplified_helper(QString &str)
6166{
6167 return QStringAlgorithms<QString>::simplified_helper(str);
6168}
6169
6170namespace {
6171 template <typename StringView>
6172 StringView qt_trimmed(StringView s) noexcept
6173 {
6174 const auto [begin, end] = QStringAlgorithms<const StringView>::trimmed_helper_positions(s);
6175 return StringView{begin, end};
6176 }
6177}
6178
6179/*!
6180 \fn QStringView QtPrivate::trimmed(QStringView s)
6181 \fn QLatin1StringView QtPrivate::trimmed(QLatin1StringView s)
6182 \internal
6183 \relates QStringView
6184 \since 5.10
6185
6186 Returns \a s with whitespace removed from the start and the end.
6187
6188 Whitespace means any character for which QChar::isSpace() returns
6189 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6190 '\\f', '\\r', and ' '.
6191
6192 \sa QString::trimmed(), QStringView::trimmed(), QLatin1StringView::trimmed()
6193*/
6194QStringView QtPrivate::trimmed(QStringView s) noexcept
6195{
6196 return qt_trimmed(s);
6197}
6198
6199QLatin1StringView QtPrivate::trimmed(QLatin1StringView s) noexcept
6200{
6201 return qt_trimmed(s);
6202}
6203
6204/*!
6205 \fn QString QString::trimmed() const
6206
6207 Returns a string that has whitespace removed from the start and
6208 the end.
6209
6210 Whitespace means any character for which QChar::isSpace() returns
6211 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6212 '\\f', '\\r', and ' '.
6213
6214 Example:
6215
6216 \snippet qstring/main.cpp 82
6217
6218 Unlike simplified(), trimmed() leaves internal whitespace alone.
6219
6220 \sa simplified()
6221*/
6222QString QString::trimmed_helper(const QString &str)
6223{
6224 return QStringAlgorithms<const QString>::trimmed_helper(str);
6225}
6226
6227QString QString::trimmed_helper(QString &str)
6228{
6229 return QStringAlgorithms<QString>::trimmed_helper(str);
6230}
6231
6232/*! \fn const QChar QString::at(qsizetype position) const
6233
6234 Returns the character at the given index \a position in the
6235 string.
6236
6237 The \a position must be a valid index position in the string
6238 (i.e., 0 <= \a position < size()).
6239
6240 \sa operator[]()
6241*/
6242
6243/*!
6244 \fn QChar &QString::operator[](qsizetype position)
6245
6246 Returns the character at the specified \a position in the string as a
6247 modifiable reference.
6248
6249 Example:
6250
6251 \snippet qstring/main.cpp 85
6252
6253 \sa at()
6254*/
6255
6256/*!
6257 \fn const QChar QString::operator[](qsizetype position) const
6258
6259 \overload operator[]()
6260*/
6261
6262/*!
6263 \fn QChar QString::front() const
6264 \since 5.10
6265
6266 Returns the first character in the string.
6267 Same as \c{at(0)}.
6268
6269 This function is provided for STL compatibility.
6270
6271 \warning Calling this function on an empty string constitutes
6272 undefined behavior.
6273
6274 \sa back(), at(), operator[]()
6275*/
6276
6277/*!
6278 \fn QChar QString::back() const
6279 \since 5.10
6280
6281 Returns the last character in the string.
6282 Same as \c{at(size() - 1)}.
6283
6284 This function is provided for STL compatibility.
6285
6286 \warning Calling this function on an empty string constitutes
6287 undefined behavior.
6288
6289 \sa front(), at(), operator[]()
6290*/
6291
6292/*!
6293 \fn QChar &QString::front()
6294 \since 5.10
6295
6296 Returns a reference to the first character in the string.
6297 Same as \c{operator[](0)}.
6298
6299 This function is provided for STL compatibility.
6300
6301 \warning Calling this function on an empty string constitutes
6302 undefined behavior.
6303
6304 \sa back(), at(), operator[]()
6305*/
6306
6307/*!
6308 \fn QChar &QString::back()
6309 \since 5.10
6310
6311 Returns a reference to the last character in the string.
6312 Same as \c{operator[](size() - 1)}.
6313
6314 This function is provided for STL compatibility.
6315
6316 \warning Calling this function on an empty string constitutes
6317 undefined behavior.
6318
6319 \sa front(), at(), operator[]()
6320*/
6321
6322/*!
6323 \fn void QString::truncate(qsizetype position)
6324
6325 Truncates the string at the given \a position index.
6326
6327 If the specified \a position index is beyond the end of the
6328 string, nothing happens.
6329
6330 Example:
6331
6332 \snippet qstring/main.cpp 83
6333
6334 If \a position is negative, it is equivalent to passing zero.
6335
6336 \sa chop(), resize(), first(), QStringView::truncate()
6337*/
6338
6339void QString::truncate(qsizetype pos)
6340{
6341 if (pos < size())
6342 resize(size: pos);
6343}
6344
6345
6346/*!
6347 Removes \a n characters from the end of the string.
6348
6349 If \a n is greater than or equal to size(), the result is an
6350 empty string; if \a n is negative, it is equivalent to passing zero.
6351
6352 Example:
6353 \snippet qstring/main.cpp 15
6354
6355 If you want to remove characters from the \e beginning of the
6356 string, use remove() instead.
6357
6358 \sa truncate(), resize(), remove(), QStringView::chop()
6359*/
6360void QString::chop(qsizetype n)
6361{
6362 if (n > 0)
6363 resize(size: d.size - n);
6364}
6365
6366/*!
6367 Sets every character in the string to character \a ch. If \a size
6368 is different from -1 (default), the string is resized to \a
6369 size beforehand.
6370
6371 Example:
6372
6373 \snippet qstring/main.cpp 21
6374
6375 \sa resize()
6376*/
6377
6378QString& QString::fill(QChar ch, qsizetype size)
6379{
6380 resize(size: size < 0 ? d.size : size);
6381 if (d.size)
6382 std::fill(first: d.data(), last: d.data() + d.size, value: ch.unicode());
6383 return *this;
6384}
6385
6386/*!
6387 \fn qsizetype QString::length() const
6388
6389 Returns the number of characters in this string. Equivalent to
6390 size().
6391
6392 \sa resize()
6393*/
6394
6395/*!
6396 \fn qsizetype QString::size() const
6397
6398 Returns the number of characters in this string.
6399
6400 The last character in the string is at position size() - 1.
6401
6402 Example:
6403 \snippet qstring/main.cpp 58
6404
6405 \sa isEmpty(), resize()
6406*/
6407
6408/*!
6409 \fn qsizetype QString::max_size() const
6410 \fn qsizetype QString::maxSize()
6411 \since 6.8
6412
6413 It returns the maximum number of elements that the string can
6414 theoretically hold. In practice, the number can be much smaller,
6415 limited by the amount of memory available to the system.
6416*/
6417
6418/*! \fn bool QString::isNull() const
6419
6420 Returns \c true if this string is null; otherwise returns \c false.
6421
6422 Example:
6423
6424 \snippet qstring/main.cpp 28
6425
6426 Qt makes a distinction between null strings and empty strings for
6427 historical reasons. For most applications, what matters is
6428 whether or not a string contains any data, and this can be
6429 determined using the isEmpty() function.
6430
6431 \sa isEmpty()
6432*/
6433
6434/*! \fn bool QString::isEmpty() const
6435
6436 Returns \c true if the string has no characters; otherwise returns
6437 \c false.
6438
6439 Example:
6440
6441 \snippet qstring/main.cpp 27
6442
6443 \sa size()
6444*/
6445
6446/*! \fn QString &QString::operator+=(const QString &other)
6447
6448 Appends the string \a other onto the end of this string and
6449 returns a reference to this string.
6450
6451 Example:
6452
6453 \snippet qstring/main.cpp 84
6454
6455 This operation is typically very fast (\l{constant time}),
6456 because QString preallocates extra space at the end of the string
6457 data so it can grow without reallocating the entire string each
6458 time.
6459
6460 \sa append(), prepend()
6461*/
6462
6463/*! \fn QString &QString::operator+=(QLatin1StringView str)
6464
6465 \overload operator+=()
6466
6467 Appends the Latin-1 string viewed by \a str to this string.
6468*/
6469
6470/*! \fn QString &QString::operator+=(QUtf8StringView str)
6471 \since 6.5
6472 \overload operator+=()
6473
6474 Appends the UTF-8 string view \a str to this string.
6475*/
6476
6477/*! \fn QString &QString::operator+=(const QByteArray &ba)
6478
6479 \overload operator+=()
6480
6481 Appends the byte array \a ba to this string. The byte array is converted
6482 to Unicode using the fromUtf8() function. If any NUL characters ('\\0')
6483 are embedded in the \a ba byte array, they will be included in the
6484 transformation.
6485
6486 You can disable this function by defining
6487 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
6488 can be useful if you want to ensure that all user-visible strings
6489 go through QObject::tr(), for example.
6490*/
6491
6492/*! \fn QString &QString::operator+=(const char *str)
6493
6494 \overload operator+=()
6495
6496 Appends the string \a str to this string. The const char pointer
6497 is converted to Unicode using the fromUtf8() function.
6498
6499 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
6500 when you compile your applications. This can be useful if you want
6501 to ensure that all user-visible strings go through QObject::tr(),
6502 for example.
6503*/
6504
6505/*! \fn QString &QString::operator+=(QStringView str)
6506 \since 6.0
6507 \overload operator+=()
6508
6509 Appends the string view \a str to this string.
6510*/
6511
6512/*! \fn QString &QString::operator+=(QChar ch)
6513
6514 \overload operator+=()
6515
6516 Appends the character \a ch to the string.
6517*/
6518
6519/*!
6520 \fn bool QString::operator==(const char * const &lhs, const QString &rhs)
6521
6522 \overload operator==()
6523
6524 Returns \c true if \a lhs is equal to \a rhs; otherwise returns \c false.
6525 Note that no string is equal to \a lhs being 0.
6526
6527 Equivalent to \c {lhs != 0 && compare(lhs, rhs) == 0}.
6528*/
6529
6530/*!
6531 \fn bool QString::operator!=(const char * const &lhs, const QString &rhs)
6532
6533 Returns \c true if \a lhs is not equal to \a rhs; otherwise returns
6534 \c false.
6535
6536 For \a lhs != 0, this is equivalent to \c {compare(} \a lhs, \a rhs
6537 \c {) != 0}. Note that no string is equal to \a lhs being 0.
6538*/
6539
6540/*!
6541 \fn bool QString::operator<(const char * const &lhs, const QString &rhs)
6542
6543 Returns \c true if \a lhs is lexically less than \a rhs; otherwise
6544 returns \c false. For \a lhs != 0, this is equivalent to \c
6545 {compare(lhs, rhs) < 0}.
6546
6547 \sa {Comparing Strings}
6548*/
6549
6550/*!
6551 \fn bool QString::operator<=(const char * const &lhs, const QString &rhs)
6552
6553 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
6554 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6555 {compare(lhs, rhs) <= 0}.
6556
6557 \sa {Comparing Strings}
6558*/
6559
6560/*!
6561 \fn bool QString::operator>(const char * const &lhs, const QString &rhs)
6562
6563 Returns \c true if \a lhs is lexically greater than \a rhs; otherwise
6564 returns \c false. Equivalent to \c {compare(lhs, rhs) > 0}.
6565
6566 \sa {Comparing Strings}
6567*/
6568
6569/*!
6570 \fn bool QString::operator>=(const char * const &lhs, const QString &rhs)
6571
6572 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
6573 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6574 {compare(lhs, rhs) >= 0}.
6575
6576 \sa {Comparing Strings}
6577*/
6578
6579/*!
6580 \fn QString operator+(const QString &s1, const QString &s2)
6581 \fn QString operator+(QString &&s1, const QString &s2)
6582 \relates QString
6583
6584 Returns a string which is the result of concatenating \a s1 and \a
6585 s2.
6586*/
6587
6588/*!
6589 \fn QString operator+(const QString &s1, const char *s2)
6590 \relates QString
6591
6592 Returns a string which is the result of concatenating \a s1 and \a
6593 s2 (\a s2 is converted to Unicode using the QString::fromUtf8()
6594 function).
6595
6596 \sa QString::fromUtf8()
6597*/
6598
6599/*!
6600 \fn QString operator+(const char *s1, const QString &s2)
6601 \relates QString
6602
6603 Returns a string which is the result of concatenating \a s1 and \a
6604 s2 (\a s1 is converted to Unicode using the QString::fromUtf8()
6605 function).
6606
6607 \sa QString::fromUtf8()
6608*/
6609
6610/*!
6611 \fn int QString::compare(const QString &s1, const QString &s2, Qt::CaseSensitivity cs)
6612 \since 4.2
6613
6614 Compares the string \a s1 with the string \a s2 and returns a negative integer
6615 if \a s1 is less than \a s2, a positive integer if it is greater than \a s2,
6616 and zero if they are equal.
6617
6618 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
6619
6620 Case sensitive comparison is based exclusively on the numeric
6621 Unicode values of the characters and is very fast, but is not what
6622 a human would expect. Consider sorting user-visible strings with
6623 localeAwareCompare().
6624
6625 \snippet qstring/main.cpp 16
6626
6627//! [compare-isNull-vs-isEmpty]
6628 \note This function treats null strings the same as empty strings,
6629 for more details see \l {Distinction Between Null and Empty Strings}.
6630//! [compare-isNull-vs-isEmpty]
6631
6632 \sa operator==(), operator<(), operator>(), {Comparing Strings}
6633*/
6634
6635/*!
6636 \fn int QString::compare(const QString &s1, QLatin1StringView s2, Qt::CaseSensitivity cs)
6637 \since 4.2
6638 \overload compare()
6639
6640 Performs a comparison of \a s1 and \a s2, using the case
6641 sensitivity setting \a cs.
6642*/
6643
6644/*!
6645 \fn int QString::compare(QLatin1StringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6646
6647 \since 4.2
6648 \overload compare()
6649
6650 Performs a comparison of \a s1 and \a s2, using the case
6651 sensitivity setting \a cs.
6652*/
6653
6654/*!
6655 \fn int QString::compare(QStringView s, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6656
6657 \since 5.12
6658 \overload compare()
6659
6660 Performs a comparison of this with \a s, using the case
6661 sensitivity setting \a cs.
6662*/
6663
6664/*!
6665 \fn int QString::compare(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6666
6667 \since 5.14
6668 \overload compare()
6669
6670 Performs a comparison of this with \a ch, using the case
6671 sensitivity setting \a cs.
6672*/
6673
6674/*!
6675 \overload compare()
6676 \since 4.2
6677
6678 Lexically compares this string with the string \a other and returns
6679 a negative integer if this string is less than \a other, a positive
6680 integer if it is greater than \a other, and zero if they are equal.
6681
6682 Same as compare(*this, \a other, \a cs).
6683*/
6684int QString::compare(const QString &other, Qt::CaseSensitivity cs) const noexcept
6685{
6686 return QtPrivate::compareStrings(lhs: *this, rhs: other, cs);
6687}
6688
6689/*!
6690 \internal
6691 \since 4.5
6692*/
6693int QString::compare_helper(const QChar *data1, qsizetype length1, const QChar *data2, qsizetype length2,
6694 Qt::CaseSensitivity cs) noexcept
6695{
6696 Q_ASSERT(length1 >= 0);
6697 Q_ASSERT(length2 >= 0);
6698 Q_ASSERT(data1 || length1 == 0);
6699 Q_ASSERT(data2 || length2 == 0);
6700 return QtPrivate::compareStrings(lhs: QStringView(data1, length1), rhs: QStringView(data2, length2), cs);
6701}
6702
6703/*!
6704 \overload compare()
6705 \since 4.2
6706
6707 Same as compare(*this, \a other, \a cs).
6708*/
6709int QString::compare(QLatin1StringView other, Qt::CaseSensitivity cs) const noexcept
6710{
6711 return QtPrivate::compareStrings(lhs: *this, rhs: other, cs);
6712}
6713
6714/*!
6715 \internal
6716 \since 5.0
6717*/
6718int QString::compare_helper(const QChar *data1, qsizetype length1, const char *data2, qsizetype length2,
6719 Qt::CaseSensitivity cs) noexcept
6720{
6721 Q_ASSERT(length1 >= 0);
6722 Q_ASSERT(data1 || length1 == 0);
6723 if (!data2)
6724 return qt_lencmp(lhs: length1, rhs: 0);
6725 if (Q_UNLIKELY(length2 < 0))
6726 length2 = qsizetype(strlen(s: data2));
6727 return QtPrivate::compareStrings(lhs: QStringView(data1, length1),
6728 rhs: QUtf8StringView(data2, length2), cs);
6729}
6730
6731/*!
6732 \fn int QString::compare(const QString &s1, QStringView s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6733 \overload compare()
6734*/
6735
6736/*!
6737 \fn int QString::compare(QStringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6738 \overload compare()
6739*/
6740
6741bool comparesEqual(const QByteArrayView &lhs, const QChar &rhs) noexcept
6742{
6743 return QtPrivate::equalStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6744}
6745
6746Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, const QChar &rhs) noexcept
6747{
6748 const int res = QtPrivate::compareStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6749 return Qt::compareThreeWay(lhs: res, rhs: 0);
6750}
6751
6752bool comparesEqual(const QByteArrayView &lhs, char16_t rhs) noexcept
6753{
6754 return QtPrivate::equalStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6755}
6756
6757Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, char16_t rhs) noexcept
6758{
6759 const int res = QtPrivate::compareStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6760 return Qt::compareThreeWay(lhs: res, rhs: 0);
6761}
6762
6763bool comparesEqual(const QByteArray &lhs, const QChar &rhs) noexcept
6764{
6765 return QtPrivate::equalStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6766}
6767
6768Qt::strong_ordering compareThreeWay(const QByteArray &lhs, const QChar &rhs) noexcept
6769{
6770 const int res = QtPrivate::compareStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6771 return Qt::compareThreeWay(lhs: res, rhs: 0);
6772}
6773
6774bool comparesEqual(const QByteArray &lhs, char16_t rhs) noexcept
6775{
6776 return QtPrivate::equalStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6777}
6778
6779Qt::strong_ordering compareThreeWay(const QByteArray &lhs, char16_t rhs) noexcept
6780{
6781 const int res = QtPrivate::compareStrings(lhs: QUtf8StringView(lhs), rhs: QStringView(&rhs, 1));
6782 return Qt::compareThreeWay(lhs: res, rhs: 0);
6783}
6784
6785/*!
6786 \internal
6787 \since 6.8
6788*/
6789bool QT_FASTCALL QChar::equal_helper(QChar lhs, const char *rhs) noexcept
6790{
6791 return QtPrivate::equalStrings(lhs: QStringView(&lhs, 1), rhs: QUtf8StringView(rhs));
6792}
6793
6794int QT_FASTCALL QChar::compare_helper(QChar lhs, const char *rhs) noexcept
6795{
6796 return QtPrivate::compareStrings(lhs: QStringView(&lhs, 1), rhs: QUtf8StringView(rhs));
6797}
6798
6799/*!
6800 \internal
6801 \since 6.8
6802*/
6803bool QStringView::equal_helper(QStringView sv, const char *data, qsizetype len)
6804{
6805 Q_ASSERT(len >= 0);
6806 Q_ASSERT(data || len == 0);
6807 return QtPrivate::equalStrings(lhs: sv, rhs: QUtf8StringView(data, len));
6808}
6809
6810/*!
6811 \internal
6812 \since 6.8
6813*/
6814int QStringView::compare_helper(QStringView sv, const char *data, qsizetype len)
6815{
6816 Q_ASSERT(len >= 0);
6817 Q_ASSERT(data || len == 0);
6818 return QtPrivate::compareStrings(lhs: sv, rhs: QUtf8StringView(data, len));
6819}
6820
6821/*!
6822 \internal
6823 \since 6.8
6824*/
6825bool QLatin1StringView::equal_helper(QLatin1StringView s1, const char *s2, qsizetype len) noexcept
6826{
6827 // because qlatin1stringview.h can't include qutf8stringview.h
6828 Q_ASSERT(len >= 0);
6829 Q_ASSERT(s2 || len == 0);
6830 return QtPrivate::equalStrings(lhs: s1, rhs: QUtf8StringView(s2, len));
6831}
6832
6833/*!
6834 \internal
6835 \since 6.6
6836*/
6837int QLatin1StringView::compare_helper(const QLatin1StringView &s1, const char *s2, qsizetype len) noexcept
6838{
6839 // because qlatin1stringview.h can't include qutf8stringview.h
6840 Q_ASSERT(len >= 0);
6841 Q_ASSERT(s2 || len == 0);
6842 return QtPrivate::compareStrings(lhs: s1, rhs: QUtf8StringView(s2, len));
6843}
6844
6845/*!
6846 \internal
6847 \since 4.5
6848*/
6849int QLatin1StringView::compare_helper(const QChar *data1, qsizetype length1, QLatin1StringView s2,
6850 Qt::CaseSensitivity cs) noexcept
6851{
6852 Q_ASSERT(length1 >= 0);
6853 Q_ASSERT(data1 || length1 == 0);
6854 return QtPrivate::compareStrings(lhs: QStringView(data1, length1), rhs: s2, cs);
6855}
6856
6857/*!
6858 \fn int QString::localeAwareCompare(const QString & s1, const QString & s2)
6859
6860 Compares \a s1 with \a s2 and returns an integer less than, equal
6861 to, or greater than zero if \a s1 is less than, equal to, or
6862 greater than \a s2.
6863
6864 The comparison is performed in a locale- and also
6865 platform-dependent manner. Use this function to present sorted
6866 lists of strings to the user.
6867
6868 \sa compare(), QLocale, {Comparing Strings}
6869*/
6870
6871/*!
6872 \fn int QString::localeAwareCompare(QStringView other) const
6873 \since 6.0
6874 \overload localeAwareCompare()
6875
6876 Compares this string with the \a other string and returns an
6877 integer less than, equal to, or greater than zero if this string
6878 is less than, equal to, or greater than the \a other string.
6879
6880 The comparison is performed in a locale- and also
6881 platform-dependent manner. Use this function to present sorted
6882 lists of strings to the user.
6883
6884 Same as \c {localeAwareCompare(*this, other)}.
6885
6886 \sa {Comparing Strings}
6887*/
6888
6889/*!
6890 \fn int QString::localeAwareCompare(QStringView s1, QStringView s2)
6891 \since 6.0
6892 \overload localeAwareCompare()
6893
6894 Compares \a s1 with \a s2 and returns an integer less than, equal
6895 to, or greater than zero if \a s1 is less than, equal to, or
6896 greater than \a s2.
6897
6898 The comparison is performed in a locale- and also
6899 platform-dependent manner. Use this function to present sorted
6900 lists of strings to the user.
6901
6902 \sa {Comparing Strings}
6903*/
6904
6905
6906#if !defined(CSTR_LESS_THAN)
6907#define CSTR_LESS_THAN 1
6908#define CSTR_EQUAL 2
6909#define CSTR_GREATER_THAN 3
6910#endif
6911
6912/*!
6913 \overload localeAwareCompare()
6914
6915 Compares this string with the \a other string and returns an
6916 integer less than, equal to, or greater than zero if this string
6917 is less than, equal to, or greater than the \a other string.
6918
6919 The comparison is performed in a locale- and also
6920 platform-dependent manner. Use this function to present sorted
6921 lists of strings to the user.
6922
6923 Same as \c {localeAwareCompare(*this, other)}.
6924
6925 \sa {Comparing Strings}
6926*/
6927int QString::localeAwareCompare(const QString &other) const
6928{
6929 return localeAwareCompare_helper(data1: constData(), length1: size(), data2: other.constData(), length2: other.size());
6930}
6931
6932/*!
6933 \internal
6934 \since 4.5
6935*/
6936int QString::localeAwareCompare_helper(const QChar *data1, qsizetype length1,
6937 const QChar *data2, qsizetype length2)
6938{
6939 Q_ASSERT(length1 >= 0);
6940 Q_ASSERT(data1 || length1 == 0);
6941 Q_ASSERT(length2 >= 0);
6942 Q_ASSERT(data2 || length2 == 0);
6943
6944 // do the right thing for null and empty
6945 if (length1 == 0 || length2 == 0)
6946 return QtPrivate::compareStrings(lhs: QStringView(data1, length1), rhs: QStringView(data2, length2),
6947 cs: Qt::CaseSensitive);
6948
6949#if QT_CONFIG(icu)
6950 return QCollator::defaultCompare(s1: QStringView(data1, length1), s2: QStringView(data2, length2));
6951#else
6952 const QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C);
6953 const QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C);
6954# if defined(Q_OS_WIN)
6955 int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPWSTR)lhs.constData(), lhs.length(), (LPWSTR)rhs.constData(), rhs.length(), NULL, NULL, 0);
6956
6957 switch (res) {
6958 case CSTR_LESS_THAN:
6959 return -1;
6960 case CSTR_GREATER_THAN:
6961 return 1;
6962 default:
6963 return 0;
6964 }
6965# elif defined (Q_OS_DARWIN)
6966 // Use CFStringCompare for comparing strings on Mac. This makes Qt order
6967 // strings the same way as native applications do, and also respects
6968 // the "Order for sorted lists" setting in the International preferences
6969 // panel.
6970 const CFStringRef thisString =
6971 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6972 reinterpret_cast<const UniChar *>(lhs.constData()), lhs.length(), kCFAllocatorNull);
6973 const CFStringRef otherString =
6974 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6975 reinterpret_cast<const UniChar *>(rhs.constData()), rhs.length(), kCFAllocatorNull);
6976
6977 const int result = CFStringCompare(thisString, otherString, kCFCompareLocalized);
6978 CFRelease(thisString);
6979 CFRelease(otherString);
6980 return result;
6981# elif defined(Q_OS_UNIX)
6982 // declared in <string.h> (no better than QtPrivate::compareStrings() on Android, sadly)
6983 return strcoll(lhs.toLocal8Bit().constData(), rhs.toLocal8Bit().constData());
6984# else
6985# error "This case shouldn't happen"
6986 return QtPrivate::compareStrings(lhs, rhs, Qt::CaseSensitive);
6987# endif
6988#endif // !QT_CONFIG(icu)
6989}
6990
6991
6992/*!
6993 \fn const QChar *QString::unicode() const
6994
6995 Returns a Unicode representation of the string.
6996 The result remains valid until the string is modified.
6997
6998 \note The returned string may not be '\\0'-terminated.
6999 Use size() to determine the length of the array.
7000
7001 \sa utf16(), fromRawData()
7002*/
7003
7004/*!
7005 \fn const ushort *QString::utf16() const
7006
7007 Returns the QString as a '\\0\'-terminated array of unsigned
7008 shorts. The result remains valid until the string is modified.
7009
7010 The returned string is in host byte order.
7011
7012 \sa unicode()
7013*/
7014
7015const ushort *QString::utf16() const
7016{
7017 if (!d->isMutable()) {
7018 // ensure '\0'-termination for ::fromRawData strings
7019 const_cast<QString*>(this)->reallocData(alloc: d.size, option: QArrayData::KeepSize);
7020 }
7021 return reinterpret_cast<const ushort *>(d.data());
7022}
7023
7024/*!
7025 Returns a string of size \a width that contains this string
7026 padded by the \a fill character.
7027
7028 If \a truncate is \c false and the size() of the string is more than
7029 \a width, then the returned string is a copy of the string.
7030
7031 \snippet qstring/main.cpp 32
7032
7033 If \a truncate is \c true and the size() of the string is more than
7034 \a width, then any characters in a copy of the string after
7035 position \a width are removed, and the copy is returned.
7036
7037 \snippet qstring/main.cpp 33
7038
7039 \sa rightJustified()
7040*/
7041
7042QString QString::leftJustified(qsizetype width, QChar fill, bool truncate) const
7043{
7044 QString result;
7045 qsizetype len = size();
7046 qsizetype padlen = width - len;
7047 if (padlen > 0) {
7048 result.resize(size: len+padlen);
7049 if (len)
7050 memcpy(dest: result.d.data(), src: d.data(), n: sizeof(QChar)*len);
7051 QChar *uc = (QChar*)result.d.data() + len;
7052 while (padlen--)
7053 * uc++ = fill;
7054 } else {
7055 if (truncate)
7056 result = left(n: width);
7057 else
7058 result = *this;
7059 }
7060 return result;
7061}
7062
7063/*!
7064 Returns a string of size() \a width that contains the \a fill
7065 character followed by the string. For example:
7066
7067 \snippet qstring/main.cpp 49
7068
7069 If \a truncate is \c false and the size() of the string is more than
7070 \a width, then the returned string is a copy of the string.
7071
7072 If \a truncate is true and the size() of the string is more than
7073 \a width, then the resulting string is truncated at position \a
7074 width.
7075
7076 \snippet qstring/main.cpp 50
7077
7078 \sa leftJustified()
7079*/
7080
7081QString QString::rightJustified(qsizetype width, QChar fill, bool truncate) const
7082{
7083 QString result;
7084 qsizetype len = size();
7085 qsizetype padlen = width - len;
7086 if (padlen > 0) {
7087 result.resize(size: len+padlen);
7088 QChar *uc = (QChar*)result.d.data();
7089 while (padlen--)
7090 * uc++ = fill;
7091 if (len)
7092 memcpy(dest: static_cast<void *>(uc), src: static_cast<const void *>(d.data()), n: sizeof(QChar)*len);
7093 } else {
7094 if (truncate)
7095 result = left(n: width);
7096 else
7097 result = *this;
7098 }
7099 return result;
7100}
7101
7102/*!
7103 \fn QString QString::toLower() const
7104
7105 Returns a lowercase copy of the string.
7106
7107 \snippet qstring/main.cpp 75
7108
7109 The case conversion will always happen in the 'C' locale. For
7110 locale-dependent case folding use QLocale::toLower()
7111
7112 \sa toUpper(), QLocale::toLower()
7113*/
7114
7115namespace QUnicodeTables {
7116/*
7117 \internal
7118 Converts the \a str string starting from the position pointed to by the \a
7119 it iterator, using the Unicode case traits \c Traits, and returns the
7120 result. The input string must not be empty (the convertCase function below
7121 guarantees that).
7122
7123 The string type \c{T} is also a template and is either \c{const QString} or
7124 \c{QString}. This function can do both copy-conversion and in-place
7125 conversion depending on the state of the \a str parameter:
7126 \list
7127 \li \c{T} is \c{const QString}: copy-convert
7128 \li \c{T} is \c{QString} and its refcount != 1: copy-convert
7129 \li \c{T} is \c{QString} and its refcount == 1: in-place convert
7130 \endlist
7131
7132 In copy-convert mode, the local variable \c{s} is detached from the input
7133 \a str. In the in-place convert mode, \a str is in moved-from state and
7134 \c{s} contains the only copy of the string, without reallocation (thus,
7135 \a it is still valid).
7136
7137 There is one pathological case left: when the in-place conversion needs to
7138 reallocate memory to grow the buffer. In that case, we need to adjust the \a
7139 it pointer.
7140 */
7141template <typename T>
7142Q_NEVER_INLINE
7143static QString detachAndConvertCase(T &str, QStringIterator it, QUnicodeTables::Case which)
7144{
7145 Q_ASSERT(!str.isEmpty());
7146 QString s = std::move(str); // will copy if T is const QString
7147 QChar *pp = s.begin() + it.index(); // will detach if necessary
7148
7149 do {
7150 const auto folded = fullConvertCase(uc: it.next(), which);
7151 if (Q_UNLIKELY(folded.size() > 1)) {
7152 if (folded.chars[0] == *pp && folded.size() == 2) {
7153 // special case: only second actually changed (e.g. surrogate pairs),
7154 // avoid slow case
7155 ++pp;
7156 *pp++ = folded.chars[1];
7157 } else {
7158 // slow path: the string is growing
7159 qsizetype inpos = it.index() - 1;
7160 qsizetype outpos = pp - s.constBegin();
7161
7162 s.replace(pos: outpos, len: 1, after: reinterpret_cast<const QChar *>(folded.data()), alen: folded.size());
7163 pp = const_cast<QChar *>(s.constBegin()) + outpos + folded.size();
7164
7165 // Adjust the input iterator if we are performing an in-place conversion
7166 if constexpr (!std::is_const<T>::value)
7167 it = QStringIterator(s.constBegin(), inpos + folded.size(), s.constEnd());
7168 }
7169 } else {
7170 *pp++ = folded.chars[0];
7171 }
7172 } while (it.hasNext());
7173
7174 return s;
7175}
7176
7177template <typename T>
7178static QString convertCase(T &str, QUnicodeTables::Case which)
7179{
7180 const QChar *p = str.constBegin();
7181 const QChar *e = p + str.size();
7182
7183 // this avoids out of bounds check in the loop
7184 while (e != p && e[-1].isHighSurrogate())
7185 --e;
7186
7187 QStringIterator it(p, e);
7188 while (it.hasNext()) {
7189 const char32_t uc = it.next();
7190 if (qGetProp(ucs4: uc)->cases[which].diff) {
7191 it.recede();
7192 return detachAndConvertCase(str, it, which);
7193 }
7194 }
7195 return std::move(str);
7196}
7197} // namespace QUnicodeTables
7198
7199QString QString::toLower_helper(const QString &str)
7200{
7201 return QUnicodeTables::convertCase(str, which: QUnicodeTables::LowerCase);
7202}
7203
7204QString QString::toLower_helper(QString &str)
7205{
7206 return QUnicodeTables::convertCase(str, which: QUnicodeTables::LowerCase);
7207}
7208
7209/*!
7210 \fn QString QString::toCaseFolded() const
7211
7212 Returns the case folded equivalent of the string. For most Unicode
7213 characters this is the same as toLower().
7214*/
7215
7216QString QString::toCaseFolded_helper(const QString &str)
7217{
7218 return QUnicodeTables::convertCase(str, which: QUnicodeTables::CaseFold);
7219}
7220
7221QString QString::toCaseFolded_helper(QString &str)
7222{
7223 return QUnicodeTables::convertCase(str, which: QUnicodeTables::CaseFold);
7224}
7225
7226/*!
7227 \fn QString QString::toUpper() const
7228
7229 Returns an uppercase copy of the string.
7230
7231 \snippet qstring/main.cpp 81
7232
7233 The case conversion will always happen in the 'C' locale. For
7234 locale-dependent case folding use QLocale::toUpper().
7235
7236 \note In some cases the uppercase form of a string may be longer than the
7237 original.
7238
7239 \sa toLower(), QLocale::toLower()
7240*/
7241
7242QString QString::toUpper_helper(const QString &str)
7243{
7244 return QUnicodeTables::convertCase(str, which: QUnicodeTables::UpperCase);
7245}
7246
7247QString QString::toUpper_helper(QString &str)
7248{
7249 return QUnicodeTables::convertCase(str, which: QUnicodeTables::UpperCase);
7250}
7251
7252/*!
7253 \since 5.5
7254
7255 Safely builds a formatted string from the format string \a cformat
7256 and an arbitrary list of arguments.
7257
7258 The format string supports the conversion specifiers, length modifiers,
7259 and flags provided by printf() in the standard C++ library. The \a cformat
7260 string and \c{%s} arguments must be UTF-8 encoded.
7261
7262 \note The \c{%lc} escape sequence expects a unicode character of type
7263 \c char16_t, or \c ushort (as returned by QChar::unicode()).
7264 The \c{%ls} escape sequence expects a pointer to a zero-terminated array
7265 of unicode characters of type \c char16_t, or ushort (as returned by
7266 QString::utf16()). This is at odds with the printf() in the standard C++
7267 library, which defines \c {%lc} to print a wchar_t and \c{%ls} to print
7268 a \c{wchar_t*}, and might also produce compiler warnings on platforms
7269 where the size of \c {wchar_t} is not 16 bits.
7270
7271 \warning We do not recommend using QString::asprintf() in new Qt
7272 code. Instead, consider using QTextStream or arg(), both of
7273 which support Unicode strings seamlessly and are type-safe.
7274 Here is an example that uses QTextStream:
7275
7276 \snippet qstring/main.cpp 64
7277
7278 For \l {QObject::tr()}{translations}, especially if the strings
7279 contains more than one escape sequence, you should consider using
7280 the arg() function instead. This allows the order of the
7281 replacements to be controlled by the translator.
7282
7283 \sa arg()
7284*/
7285
7286QString QString::asprintf(const char *cformat, ...)
7287{
7288 va_list ap;
7289 va_start(ap, cformat);
7290 const QString s = vasprintf(format: cformat, ap);
7291 va_end(ap);
7292 return s;
7293}
7294
7295static void append_utf8(QString &qs, const char *cs, qsizetype len)
7296{
7297 const qsizetype oldSize = qs.size();
7298 qs.resize(size: oldSize + len);
7299 const QChar *newEnd = QUtf8::convertToUnicode(buffer: qs.data() + oldSize, in: QByteArrayView(cs, len));
7300 qs.resize(size: newEnd - qs.constData());
7301}
7302
7303static uint parse_flag_characters(const char * &c) noexcept
7304{
7305 uint flags = QLocaleData::ZeroPadExponent;
7306 while (true) {
7307 switch (*c) {
7308 case '#':
7309 flags |= QLocaleData::ShowBase | QLocaleData::AddTrailingZeroes
7310 | QLocaleData::ForcePoint;
7311 break;
7312 case '0': flags |= QLocaleData::ZeroPadded; break;
7313 case '-': flags |= QLocaleData::LeftAdjusted; break;
7314 case ' ': flags |= QLocaleData::BlankBeforePositive; break;
7315 case '+': flags |= QLocaleData::AlwaysShowSign; break;
7316 case '\'': flags |= QLocaleData::GroupDigits; break;
7317 default: return flags;
7318 }
7319 ++c;
7320 }
7321}
7322
7323static int parse_field_width(const char *&c, qsizetype size)
7324{
7325 Q_ASSERT(isAsciiDigit(*c));
7326 const char *const stop = c + size;
7327
7328 // can't be negative - started with a digit
7329 // contains at least one digit
7330 auto [result, used] = qstrntoull(nptr: c, size, base: 10);
7331 c += used;
7332 if (used <= 0)
7333 return false;
7334 // preserve Qt 5.5 behavior of consuming all digits, no matter how many
7335 while (c < stop && isAsciiDigit(c: *c))
7336 ++c;
7337 return result < qulonglong(std::numeric_limits<int>::max()) ? int(result) : 0;
7338}
7339
7340enum LengthMod { lm_none, lm_hh, lm_h, lm_l, lm_ll, lm_L, lm_j, lm_z, lm_t };
7341
7342static inline bool can_consume(const char * &c, char ch) noexcept
7343{
7344 if (*c == ch) {
7345 ++c;
7346 return true;
7347 }
7348 return false;
7349}
7350
7351static LengthMod parse_length_modifier(const char * &c) noexcept
7352{
7353 switch (*c++) {
7354 case 'h': return can_consume(c, ch: 'h') ? lm_hh : lm_h;
7355 case 'l': return can_consume(c, ch: 'l') ? lm_ll : lm_l;
7356 case 'L': return lm_L;
7357 case 'j': return lm_j;
7358 case 'z':
7359 case 'Z': return lm_z;
7360 case 't': return lm_t;
7361 }
7362 --c; // don't consume *c - it wasn't a flag
7363 return lm_none;
7364}
7365
7366/*!
7367 \fn QString QString::vasprintf(const char *cformat, va_list ap)
7368 \since 5.5
7369
7370 Equivalent method to asprintf(), but takes a va_list \a ap
7371 instead a list of variable arguments. See the asprintf()
7372 documentation for an explanation of \a cformat.
7373
7374 This method does not call the va_end macro, the caller
7375 is responsible to call va_end on \a ap.
7376
7377 \sa asprintf()
7378*/
7379
7380QString QString::vasprintf(const char *cformat, va_list ap)
7381{
7382 if (!cformat || !*cformat) {
7383 // Qt 1.x compat
7384 return fromLatin1(ba: "");
7385 }
7386
7387 // Parse cformat
7388
7389 QString result;
7390 const char *c = cformat;
7391 const char *formatEnd = cformat + qstrlen(str: cformat);
7392 for (;;) {
7393 // Copy non-escape chars to result
7394 const char *cb = c;
7395 while (*c != '\0' && *c != '%')
7396 c++;
7397 append_utf8(qs&: result, cs: cb, len: qsizetype(c - cb));
7398
7399 if (*c == '\0')
7400 break;
7401
7402 // Found '%'
7403 const char *escape_start = c;
7404 ++c;
7405
7406 if (*c == '\0') {
7407 result.append(ch: u'%'); // a % at the end of the string - treat as non-escape text
7408 break;
7409 }
7410 if (*c == '%') {
7411 result.append(ch: u'%'); // %%
7412 ++c;
7413 continue;
7414 }
7415
7416 uint flags = parse_flag_characters(c);
7417
7418 if (*c == '\0') {
7419 result.append(str: QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7420 break;
7421 }
7422
7423 // Parse field width
7424 int width = -1; // -1 means unspecified
7425 if (isAsciiDigit(c: *c)) {
7426 width = parse_field_width(c, size: formatEnd - c);
7427 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7428 width = va_arg(ap, int);
7429 if (width < 0)
7430 width = -1; // treat all negative numbers as unspecified
7431 ++c;
7432 }
7433
7434 if (*c == '\0') {
7435 result.append(str: QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7436 break;
7437 }
7438
7439 // Parse precision
7440 int precision = -1; // -1 means unspecified
7441 if (*c == '.') {
7442 ++c;
7443 precision = 0;
7444 if (isAsciiDigit(c: *c)) {
7445 precision = parse_field_width(c, size: formatEnd - c);
7446 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7447 precision = va_arg(ap, int);
7448 if (precision < 0)
7449 precision = -1; // treat all negative numbers as unspecified
7450 ++c;
7451 }
7452 }
7453
7454 if (*c == '\0') {
7455 result.append(str: QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7456 break;
7457 }
7458
7459 const LengthMod length_mod = parse_length_modifier(c);
7460
7461 if (*c == '\0') {
7462 result.append(str: QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7463 break;
7464 }
7465
7466 // Parse the conversion specifier and do the conversion
7467 QString subst;
7468 switch (*c) {
7469 case 'd':
7470 case 'i': {
7471 qint64 i;
7472 switch (length_mod) {
7473 case lm_none: i = va_arg(ap, int); break;
7474 case lm_hh: i = va_arg(ap, int); break;
7475 case lm_h: i = va_arg(ap, int); break;
7476 case lm_l: i = va_arg(ap, long int); break;
7477 case lm_ll: i = va_arg(ap, qint64); break;
7478 case lm_j: i = va_arg(ap, long int); break;
7479
7480 /* ptrdiff_t actually, but it should be the same for us */
7481 case lm_z: i = va_arg(ap, qsizetype); break;
7482 case lm_t: i = va_arg(ap, qsizetype); break;
7483 default: i = 0; break;
7484 }
7485 subst = QLocaleData::c()->longLongToString(l: i, precision, base: 10, width, flags);
7486 ++c;
7487 break;
7488 }
7489 case 'o':
7490 case 'u':
7491 case 'x':
7492 case 'X': {
7493 quint64 u;
7494 switch (length_mod) {
7495 case lm_none: u = va_arg(ap, uint); break;
7496 case lm_hh: u = va_arg(ap, uint); break;
7497 case lm_h: u = va_arg(ap, uint); break;
7498 case lm_l: u = va_arg(ap, ulong); break;
7499 case lm_ll: u = va_arg(ap, quint64); break;
7500 case lm_t: u = va_arg(ap, size_t); break;
7501 case lm_z: u = va_arg(ap, size_t); break;
7502 default: u = 0; break;
7503 }
7504
7505 if (isAsciiUpper(c: *c))
7506 flags |= QLocaleData::CapitalEorX;
7507
7508 int base = 10;
7509 switch (QtMiscUtils::toAsciiLower(ch: *c)) {
7510 case 'o':
7511 base = 8; break;
7512 case 'u':
7513 base = 10; break;
7514 case 'x':
7515 base = 16; break;
7516 default: break;
7517 }
7518 subst = QLocaleData::c()->unsLongLongToString(l: u, precision, base, width, flags);
7519 ++c;
7520 break;
7521 }
7522 case 'E':
7523 case 'e':
7524 case 'F':
7525 case 'f':
7526 case 'G':
7527 case 'g':
7528 case 'A':
7529 case 'a': {
7530 double d;
7531 if (length_mod == lm_L)
7532 d = va_arg(ap, long double); // not supported - converted to a double
7533 else
7534 d = va_arg(ap, double);
7535
7536 if (isAsciiUpper(c: *c))
7537 flags |= QLocaleData::CapitalEorX;
7538
7539 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
7540 switch (QtMiscUtils::toAsciiLower(ch: *c)) {
7541 case 'e': form = QLocaleData::DFExponent; break;
7542 case 'a': // not supported - decimal form used instead
7543 case 'f': form = QLocaleData::DFDecimal; break;
7544 case 'g': form = QLocaleData::DFSignificantDigits; break;
7545 default: break;
7546 }
7547 subst = QLocaleData::c()->doubleToString(d, precision, form, width, flags);
7548 ++c;
7549 break;
7550 }
7551 case 'c': {
7552 if (length_mod == lm_l)
7553 subst = QChar::fromUcs2(va_arg(ap, int));
7554 else
7555 subst = QLatin1Char((uchar) va_arg(ap, int));
7556 ++c;
7557 break;
7558 }
7559 case 's': {
7560 if (length_mod == lm_l) {
7561 const ushort *buff = va_arg(ap, const ushort*);
7562 const ushort *ch = buff;
7563 while (precision != 0 && *ch != 0) {
7564 ++ch;
7565 --precision;
7566 }
7567 subst.setUtf16(autf16: buff, asize: ch - buff);
7568 } else if (precision == -1) {
7569 subst = QString::fromUtf8(va_arg(ap, const char*));
7570 } else {
7571 const char *buff = va_arg(ap, const char*);
7572 subst = QString::fromUtf8(utf8: buff, size: qstrnlen(str: buff, maxlen: precision));
7573 }
7574 ++c;
7575 break;
7576 }
7577 case 'p': {
7578 void *arg = va_arg(ap, void*);
7579 const quint64 i = reinterpret_cast<quintptr>(arg);
7580 flags |= QLocaleData::ShowBase;
7581 subst = QLocaleData::c()->unsLongLongToString(l: i, precision, base: 16, width, flags);
7582 ++c;
7583 break;
7584 }
7585 case 'n':
7586 switch (length_mod) {
7587 case lm_hh: {
7588 signed char *n = va_arg(ap, signed char*);
7589 *n = result.size();
7590 break;
7591 }
7592 case lm_h: {
7593 short int *n = va_arg(ap, short int*);
7594 *n = result.size();
7595 break;
7596 }
7597 case lm_l: {
7598 long int *n = va_arg(ap, long int*);
7599 *n = result.size();
7600 break;
7601 }
7602 case lm_ll: {
7603 qint64 *n = va_arg(ap, qint64*);
7604 *n = result.size();
7605 break;
7606 }
7607 default: {
7608 int *n = va_arg(ap, int*);
7609 *n = int(result.size());
7610 break;
7611 }
7612 }
7613 ++c;
7614 break;
7615
7616 default: // bad escape, treat as non-escape text
7617 for (const char *cc = escape_start; cc != c; ++cc)
7618 result.append(ch: QLatin1Char(*cc));
7619 continue;
7620 }
7621
7622 if (flags & QLocaleData::LeftAdjusted)
7623 result.append(str: subst.leftJustified(width));
7624 else
7625 result.append(str: subst.rightJustified(width));
7626 }
7627
7628 return result;
7629}
7630
7631/*!
7632 \fn QString::toLongLong(bool *ok, int base) const
7633
7634 Returns the string converted to a \c{long long} using base \a
7635 base, which is 10 by default and must be between 2 and 36, or 0.
7636 Returns 0 if the conversion fails.
7637
7638 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7639 to \c false, and success by setting *\a{ok} to \c true.
7640
7641 If \a base is 0, the C language convention is used: if the string begins
7642 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7643 2 is used; otherwise, if the string begins with "0", base 8 is used;
7644 otherwise, base 10 is used.
7645
7646 The string conversion will always happen in the 'C' locale. For
7647 locale-dependent conversion use QLocale::toLongLong()
7648
7649 Example:
7650
7651 \snippet qstring/main.cpp 74
7652
7653 This function ignores leading and trailing whitespace.
7654
7655 \note Support for the "0b" prefix was added in Qt 6.4.
7656
7657 \sa number(), toULongLong(), toInt(), QLocale::toLongLong()
7658*/
7659
7660template <typename Int>
7661static Int toIntegral(QStringView string, bool *ok, int base)
7662{
7663#if defined(QT_CHECK_RANGE)
7664 if (base != 0 && (base < 2 || base > 36)) {
7665 qWarning("QString::toIntegral: Invalid base (%d)", base);
7666 base = 10;
7667 }
7668#endif
7669
7670 QVarLengthArray<uchar> latin1(string.size());
7671 qt_to_latin1(dst: latin1.data(), src: string.utf16(), length: string.size());
7672 QSimpleParsedNumber<Int> r;
7673 if constexpr (std::is_signed_v<Int>)
7674 r = QLocaleData::bytearrayToLongLong(num: latin1, base);
7675 else
7676 r = QLocaleData::bytearrayToUnsLongLong(num: latin1, base);
7677 if (ok)
7678 *ok = r.ok();
7679 return r.result;
7680}
7681
7682qlonglong QString::toIntegral_helper(QStringView string, bool *ok, int base)
7683{
7684 return toIntegral<qlonglong>(string, ok, base);
7685}
7686
7687/*!
7688 \fn QString::toULongLong(bool *ok, int base) const
7689
7690 Returns the string converted to an \c{unsigned long long} using base \a
7691 base, which is 10 by default and must be between 2 and 36, or 0.
7692 Returns 0 if the conversion fails.
7693
7694 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7695 to \c false, and success by setting *\a{ok} to \c true.
7696
7697 If \a base is 0, the C language convention is used: if the string begins
7698 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7699 2 is used; otherwise, if the string begins with "0", base 8 is used;
7700 otherwise, base 10 is used.
7701
7702 The string conversion will always happen in the 'C' locale. For
7703 locale-dependent conversion use QLocale::toULongLong()
7704
7705 Example:
7706
7707 \snippet qstring/main.cpp 79
7708
7709 This function ignores leading and trailing whitespace.
7710
7711 \note Support for the "0b" prefix was added in Qt 6.4.
7712
7713 \sa number(), toLongLong(), QLocale::toULongLong()
7714*/
7715
7716qulonglong QString::toIntegral_helper(QStringView string, bool *ok, uint base)
7717{
7718 return toIntegral<qulonglong>(string, ok, base);
7719}
7720
7721/*!
7722 \fn long QString::toLong(bool *ok, int base) const
7723
7724 Returns the string converted to a \c long using base \a
7725 base, which is 10 by default and must be between 2 and 36, or 0.
7726 Returns 0 if the conversion fails.
7727
7728 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7729 to \c false, and success by setting *\a{ok} to \c true.
7730
7731 If \a base is 0, the C language convention is used: if the string begins
7732 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7733 2 is used; otherwise, if the string begins with "0", base 8 is used;
7734 otherwise, base 10 is used.
7735
7736 The string conversion will always happen in the 'C' locale. For
7737 locale-dependent conversion use QLocale::toLongLong()
7738
7739 Example:
7740
7741 \snippet qstring/main.cpp 73
7742
7743 This function ignores leading and trailing whitespace.
7744
7745 \note Support for the "0b" prefix was added in Qt 6.4.
7746
7747 \sa number(), toULong(), toInt(), QLocale::toInt()
7748*/
7749
7750/*!
7751 \fn ulong QString::toULong(bool *ok, int base) const
7752
7753 Returns the string converted to an \c{unsigned long} using base \a
7754 base, which is 10 by default and must be between 2 and 36, or 0.
7755 Returns 0 if the conversion fails.
7756
7757 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7758 to \c false, and success by setting *\a{ok} to \c true.
7759
7760 If \a base is 0, the C language convention is used: if the string begins
7761 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7762 2 is used; otherwise, if the string begins with "0", base 8 is used;
7763 otherwise, base 10 is used.
7764
7765 The string conversion will always happen in the 'C' locale. For
7766 locale-dependent conversion use QLocale::toULongLong()
7767
7768 Example:
7769
7770 \snippet qstring/main.cpp 78
7771
7772 This function ignores leading and trailing whitespace.
7773
7774 \note Support for the "0b" prefix was added in Qt 6.4.
7775
7776 \sa number(), QLocale::toUInt()
7777*/
7778
7779/*!
7780 \fn int QString::toInt(bool *ok, int base) const
7781 Returns the string converted to an \c int using base \a
7782 base, which is 10 by default and must be between 2 and 36, or 0.
7783 Returns 0 if the conversion fails.
7784
7785 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7786 to \c false, and success by setting *\a{ok} to \c true.
7787
7788 If \a base is 0, the C language convention is used: if the string begins
7789 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7790 2 is used; otherwise, if the string begins with "0", base 8 is used;
7791 otherwise, base 10 is used.
7792
7793 The string conversion will always happen in the 'C' locale. For
7794 locale-dependent conversion use QLocale::toInt()
7795
7796 Example:
7797
7798 \snippet qstring/main.cpp 72
7799
7800 This function ignores leading and trailing whitespace.
7801
7802 \note Support for the "0b" prefix was added in Qt 6.4.
7803
7804 \sa number(), toUInt(), toDouble(), QLocale::toInt()
7805*/
7806
7807/*!
7808 \fn uint QString::toUInt(bool *ok, int base) const
7809 Returns the string converted to an \c{unsigned int} using base \a
7810 base, which is 10 by default and must be between 2 and 36, or 0.
7811 Returns 0 if the conversion fails.
7812
7813 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7814 to \c false, and success by setting *\a{ok} to \c true.
7815
7816 If \a base is 0, the C language convention is used: if the string begins
7817 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7818 2 is used; otherwise, if the string begins with "0", base 8 is used;
7819 otherwise, base 10 is used.
7820
7821 The string conversion will always happen in the 'C' locale. For
7822 locale-dependent conversion use QLocale::toUInt()
7823
7824 Example:
7825
7826 \snippet qstring/main.cpp 77
7827
7828 This function ignores leading and trailing whitespace.
7829
7830 \note Support for the "0b" prefix was added in Qt 6.4.
7831
7832 \sa number(), toInt(), QLocale::toUInt()
7833*/
7834
7835/*!
7836 \fn short QString::toShort(bool *ok, int base) const
7837
7838 Returns the string converted to a \c short using base \a
7839 base, which is 10 by default and must be between 2 and 36, or 0.
7840 Returns 0 if the conversion fails.
7841
7842 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7843 to \c false, and success by setting *\a{ok} to \c true.
7844
7845 If \a base is 0, the C language convention is used: if the string begins
7846 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7847 2 is used; otherwise, if the string begins with "0", base 8 is used;
7848 otherwise, base 10 is used.
7849
7850 The string conversion will always happen in the 'C' locale. For
7851 locale-dependent conversion use QLocale::toShort()
7852
7853 Example:
7854
7855 \snippet qstring/main.cpp 76
7856
7857 This function ignores leading and trailing whitespace.
7858
7859 \note Support for the "0b" prefix was added in Qt 6.4.
7860
7861 \sa number(), toUShort(), toInt(), QLocale::toShort()
7862*/
7863
7864/*!
7865 \fn ushort QString::toUShort(bool *ok, int base) const
7866
7867 Returns the string converted to an \c{unsigned short} using base \a
7868 base, which is 10 by default and must be between 2 and 36, or 0.
7869 Returns 0 if the conversion fails.
7870
7871 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7872 to \c false, and success by setting *\a{ok} to \c true.
7873
7874 If \a base is 0, the C language convention is used: if the string begins
7875 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7876 2 is used; otherwise, if the string begins with "0", base 8 is used;
7877 otherwise, base 10 is used.
7878
7879 The string conversion will always happen in the 'C' locale. For
7880 locale-dependent conversion use QLocale::toUShort()
7881
7882 Example:
7883
7884 \snippet qstring/main.cpp 80
7885
7886 This function ignores leading and trailing whitespace.
7887
7888 \note Support for the "0b" prefix was added in Qt 6.4.
7889
7890 \sa number(), toShort(), QLocale::toUShort()
7891*/
7892
7893/*!
7894 Returns the string converted to a \c double value.
7895
7896 Returns an infinity if the conversion overflows or 0.0 if the
7897 conversion fails for other reasons (e.g. underflow).
7898
7899 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7900 to \c false, and success by setting *\a{ok} to \c true.
7901
7902 \snippet qstring/main.cpp 66
7903
7904 \warning The QString content may only contain valid numerical characters
7905 which includes the plus/minus sign, the character e used in scientific
7906 notation, and the decimal point. Including the unit or additional characters
7907 leads to a conversion error.
7908
7909 \snippet qstring/main.cpp 67
7910
7911 The string conversion will always happen in the 'C' locale. For
7912 locale-dependent conversion use QLocale::toDouble()
7913
7914 \snippet qstring/main.cpp 68
7915
7916 For historical reasons, this function does not handle
7917 thousands group separators. If you need to convert such numbers,
7918 use QLocale::toDouble().
7919
7920 \snippet qstring/main.cpp 69
7921
7922 This function ignores leading and trailing whitespace.
7923
7924 \sa number(), QLocale::setDefault(), QLocale::toDouble(), trimmed()
7925*/
7926
7927double QString::toDouble(bool *ok) const
7928{
7929 return QStringView(*this).toDouble(ok);
7930}
7931
7932double QStringView::toDouble(bool *ok) const
7933{
7934 QStringView string = qt_trimmed(s: *this);
7935 QVarLengthArray<uchar> latin1(string.size());
7936 qt_to_latin1(dst: latin1.data(), src: string.utf16(), length: string.size());
7937 auto r = qt_asciiToDouble(num: reinterpret_cast<const char *>(latin1.data()), numLen: string.size());
7938 if (ok != nullptr)
7939 *ok = r.ok();
7940 return r.result;
7941}
7942
7943/*!
7944 Returns the string converted to a \c float value.
7945
7946 Returns an infinity if the conversion overflows or 0.0 if the
7947 conversion fails for other reasons (e.g. underflow).
7948
7949 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7950 to \c false, and success by setting *\a{ok} to \c true.
7951
7952 \warning The QString content may only contain valid numerical characters
7953 which includes the plus/minus sign, the character e used in scientific
7954 notation, and the decimal point. Including the unit or additional characters
7955 leads to a conversion error.
7956
7957 The string conversion will always happen in the 'C' locale. For
7958 locale-dependent conversion use QLocale::toFloat()
7959
7960 For historical reasons, this function does not handle
7961 thousands group separators. If you need to convert such numbers,
7962 use QLocale::toFloat().
7963
7964 Example:
7965
7966 \snippet qstring/main.cpp 71
7967
7968 This function ignores leading and trailing whitespace.
7969
7970 \sa number(), toDouble(), toInt(), QLocale::toFloat(), trimmed()
7971*/
7972
7973float QString::toFloat(bool *ok) const
7974{
7975 return QLocaleData::convertDoubleToFloat(d: toDouble(ok), ok);
7976}
7977
7978float QStringView::toFloat(bool *ok) const
7979{
7980 return QLocaleData::convertDoubleToFloat(d: toDouble(ok), ok);
7981}
7982
7983/*! \fn QString &QString::setNum(int n, int base)
7984
7985 Sets the string to the printed value of \a n in the specified \a
7986 base, and returns a reference to the string.
7987
7988 The base is 10 by default and must be between 2 and 36.
7989
7990 \snippet qstring/main.cpp 56
7991
7992 The formatting always uses QLocale::C, i.e., English/UnitedStates.
7993 To get a localized string representation of a number, use
7994 QLocale::toString() with the appropriate locale.
7995
7996 \sa number()
7997*/
7998
7999/*! \fn QString &QString::setNum(uint n, int base)
8000
8001 \overload
8002*/
8003
8004/*! \fn QString &QString::setNum(long n, int base)
8005
8006 \overload
8007*/
8008
8009/*! \fn QString &QString::setNum(ulong n, int base)
8010
8011 \overload
8012*/
8013
8014/*!
8015 \overload
8016*/
8017QString &QString::setNum(qlonglong n, int base)
8018{
8019 return *this = number(n, base);
8020}
8021
8022/*!
8023 \overload
8024*/
8025QString &QString::setNum(qulonglong n, int base)
8026{
8027 return *this = number(n, base);
8028}
8029
8030/*! \fn QString &QString::setNum(short n, int base)
8031
8032 \overload
8033*/
8034
8035/*! \fn QString &QString::setNum(ushort n, int base)
8036
8037 \overload
8038*/
8039
8040/*!
8041 \overload
8042
8043 Sets the string to the printed value of \a n, formatted according to the
8044 given \a format and \a precision, and returns a reference to the string.
8045
8046 \sa number(), QLocale::FloatingPointPrecisionOption, {Number Formats}
8047*/
8048
8049QString &QString::setNum(double n, char format, int precision)
8050{
8051 return *this = number(n, format, precision);
8052}
8053
8054/*!
8055 \fn QString &QString::setNum(float n, char format, int precision)
8056 \overload
8057
8058 Sets the string to the printed value of \a n, formatted according
8059 to the given \a format and \a precision, and returns a reference
8060 to the string.
8061
8062 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8063 To get a localized string representation of a number, use
8064 QLocale::toString() with the appropriate locale.
8065
8066 \sa number()
8067*/
8068
8069
8070/*!
8071 \fn QString QString::number(long n, int base)
8072
8073 Returns a string equivalent of the number \a n according to the
8074 specified \a base.
8075
8076 The base is 10 by default and must be between 2
8077 and 36. For bases other than 10, \a n is treated as an
8078 unsigned integer.
8079
8080 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8081 To get a localized string representation of a number, use
8082 QLocale::toString() with the appropriate locale.
8083
8084 \snippet qstring/main.cpp 35
8085
8086 \sa setNum()
8087*/
8088
8089QString QString::number(long n, int base)
8090{
8091 return number(qlonglong(n), base);
8092}
8093
8094/*!
8095 \fn QString QString::number(ulong n, int base)
8096
8097 \overload
8098*/
8099QString QString::number(ulong n, int base)
8100{
8101 return number(qulonglong(n), base);
8102}
8103
8104/*!
8105 \overload
8106*/
8107QString QString::number(int n, int base)
8108{
8109 return number(qlonglong(n), base);
8110}
8111
8112/*!
8113 \overload
8114*/
8115QString QString::number(uint n, int base)
8116{
8117 return number(qulonglong(n), base);
8118}
8119
8120/*!
8121 \overload
8122*/
8123QString QString::number(qlonglong n, int base)
8124{
8125#if defined(QT_CHECK_RANGE)
8126 if (base < 2 || base > 36) {
8127 qWarning("QString::setNum: Invalid base (%d)", base);
8128 base = 10;
8129 }
8130#endif
8131 bool negative = n < 0;
8132 /*
8133 Negating std::numeric_limits<qlonglong>::min() hits undefined behavior, so
8134 taking an absolute value has to take a slight detour.
8135 */
8136 return qulltoBasicLatin(l: negative ? 1u + qulonglong(-(n + 1)) : qulonglong(n), base, negative);
8137}
8138
8139/*!
8140 \overload
8141*/
8142QString QString::number(qulonglong n, int base)
8143{
8144#if defined(QT_CHECK_RANGE)
8145 if (base < 2 || base > 36) {
8146 qWarning("QString::setNum: Invalid base (%d)", base);
8147 base = 10;
8148 }
8149#endif
8150 return qulltoBasicLatin(l: n, base, negative: false);
8151}
8152
8153
8154/*!
8155 Returns a string representing the floating-point number \a n.
8156
8157 Returns a string that represents \a n, formatted according to the specified
8158 \a format and \a precision.
8159
8160 For formats with an exponent, the exponent will show its sign and have at
8161 least two digits, left-padding the exponent with zero if needed.
8162
8163 \sa setNum(), QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number Formats}
8164*/
8165QString QString::number(double n, char format, int precision)
8166{
8167 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
8168
8169 switch (QtMiscUtils::toAsciiLower(ch: format)) {
8170 case 'f':
8171 form = QLocaleData::DFDecimal;
8172 break;
8173 case 'e':
8174 form = QLocaleData::DFExponent;
8175 break;
8176 case 'g':
8177 form = QLocaleData::DFSignificantDigits;
8178 break;
8179 default:
8180#if defined(QT_CHECK_RANGE)
8181 qWarning("QString::setNum: Invalid format char '%c'", format);
8182#endif
8183 break;
8184 }
8185
8186 return qdtoBasicLatin(d: n, form, precision, uppercase: isAsciiUpper(c: format));
8187}
8188
8189namespace {
8190template<class ResultList, class StringSource>
8191static ResultList splitString(const StringSource &source, QStringView sep,
8192 Qt::SplitBehavior behavior, Qt::CaseSensitivity cs)
8193{
8194 ResultList list;
8195 typename StringSource::size_type start = 0;
8196 typename StringSource::size_type end;
8197 typename StringSource::size_type extra = 0;
8198 while ((end = QtPrivate::findString(QStringView(source.constData(), source.size()), start + extra, sep, cs)) != -1) {
8199 if (start != end || behavior == Qt::KeepEmptyParts)
8200 list.append(source.sliced(start, end - start));
8201 start = end + sep.size();
8202 extra = (sep.size() == 0 ? 1 : 0);
8203 }
8204 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8205 list.append(source.sliced(start));
8206 return list;
8207}
8208
8209} // namespace
8210
8211/*!
8212 Splits the string into substrings wherever \a sep occurs, and
8213 returns the list of those strings. If \a sep does not match
8214 anywhere in the string, split() returns a single-element list
8215 containing this string.
8216
8217 \a cs specifies whether \a sep should be matched case
8218 sensitively or case insensitively.
8219
8220 If \a behavior is Qt::SkipEmptyParts, empty entries don't
8221 appear in the result. By default, empty entries are kept.
8222
8223 Example:
8224
8225 \snippet qstring/main.cpp 62
8226
8227 If \a sep is empty, split() returns an empty string, followed
8228 by each of the string's characters, followed by another empty string:
8229
8230 \snippet qstring/main.cpp 62-empty
8231
8232 To understand this behavior, recall that the empty string matches
8233 everywhere, so the above is qualitatively the same as:
8234
8235 \snippet qstring/main.cpp 62-slashes
8236
8237 \sa QStringList::join(), section()
8238
8239 \since 5.14
8240*/
8241QStringList QString::split(const QString &sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8242{
8243 return splitString<QStringList>(source: *this, sep, behavior, cs);
8244}
8245
8246/*!
8247 \overload
8248 \since 5.14
8249*/
8250QStringList QString::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8251{
8252 return splitString<QStringList>(source: *this, sep: QStringView(&sep, 1), behavior, cs);
8253}
8254
8255/*!
8256 \fn QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8257 \fn QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8258
8259
8260 Splits the view into substring views wherever \a sep occurs, and
8261 returns the list of those string views.
8262
8263 See QString::split() for how \a sep, \a behavior and \a cs interact to form
8264 the result.
8265
8266 \note All the returned views are valid as long as the data referenced by
8267 this string view is valid. Destroying the data will cause all views to
8268 become dangling.
8269
8270 \since 6.0
8271*/
8272QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8273{
8274 return splitString<QList<QStringView>>(source: QStringView(*this), sep, behavior, cs);
8275}
8276
8277QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8278{
8279 return split(sep: QStringView(&sep, 1), behavior, cs);
8280}
8281
8282#if QT_CONFIG(regularexpression)
8283namespace {
8284template<class ResultList, typename String, typename MatchingFunction>
8285static ResultList splitString(const String &source, const QRegularExpression &re,
8286 MatchingFunction matchingFunction,
8287 Qt::SplitBehavior behavior)
8288{
8289 ResultList list;
8290 if (!re.isValid()) {
8291 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString::split");
8292 return list;
8293 }
8294
8295 qsizetype start = 0;
8296 qsizetype end = 0;
8297 QRegularExpressionMatchIterator iterator = (re.*matchingFunction)(source, 0, QRegularExpression::NormalMatch, QRegularExpression::NoMatchOption);
8298 while (iterator.hasNext()) {
8299 QRegularExpressionMatch match = iterator.next();
8300 end = match.capturedStart();
8301 if (start != end || behavior == Qt::KeepEmptyParts)
8302 list.append(source.sliced(start, end - start));
8303 start = match.capturedEnd();
8304 }
8305
8306 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8307 list.append(source.sliced(start));
8308
8309 return list;
8310}
8311} // namespace
8312
8313/*!
8314 \overload
8315 \since 5.14
8316
8317 Splits the string into substrings wherever the regular expression
8318 \a re matches, and returns the list of those strings. If \a re
8319 does not match anywhere in the string, split() returns a
8320 single-element list containing this string.
8321
8322 Here is an example where we extract the words in a sentence
8323 using one or more whitespace characters as the separator:
8324
8325 \snippet qstring/main.cpp 90
8326
8327 Here is a similar example, but this time we use any sequence of
8328 non-word characters as the separator:
8329
8330 \snippet qstring/main.cpp 91
8331
8332 Here is a third example where we use a zero-length assertion,
8333 \b{\\b} (word boundary), to split the string into an
8334 alternating sequence of non-word and word tokens:
8335
8336 \snippet qstring/main.cpp 92
8337
8338 \sa QStringList::join(), section()
8339*/
8340QStringList QString::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8341{
8342#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
8343 const auto matchingFunction = qOverload<const QString &, qsizetype, QRegularExpression::MatchType, QRegularExpression::MatchOptions>(&QRegularExpression::globalMatch);
8344#else
8345 const auto matchingFunction = &QRegularExpression::globalMatch;
8346#endif
8347 return splitString<QStringList>(source: *this,
8348 re,
8349 matchingFunction,
8350 behavior);
8351}
8352
8353/*!
8354 \overload
8355 \since 6.0
8356
8357 Splits the string into substring views wherever the regular expression \a re
8358 matches, and returns the list of those strings. If \a re does not match
8359 anywhere in the string, split() returns a single-element list containing
8360 this string as view.
8361
8362 \note The views in the returned list are sub-views of this view; as such,
8363 they reference the same data as it and only remain valid for as long as that
8364 data remains live.
8365*/
8366QList<QStringView> QStringView::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8367{
8368 return splitString<QList<QStringView>>(source: *this, re, matchingFunction: &QRegularExpression::globalMatchView, behavior);
8369}
8370
8371#endif // QT_CONFIG(regularexpression)
8372
8373/*!
8374 \enum QString::NormalizationForm
8375
8376 This enum describes the various normalized forms of Unicode text.
8377
8378 \value NormalizationForm_D Canonical Decomposition
8379 \value NormalizationForm_C Canonical Decomposition followed by Canonical Composition
8380 \value NormalizationForm_KD Compatibility Decomposition
8381 \value NormalizationForm_KC Compatibility Decomposition followed by Canonical Composition
8382
8383 \sa normalized(),
8384 {https://www.unicode.org/reports/tr15/}{Unicode Standard Annex #15}
8385*/
8386
8387/*!
8388 \since 4.5
8389
8390 Returns a copy of this string repeated the specified number of \a times.
8391
8392 If \a times is less than 1, an empty string is returned.
8393
8394 Example:
8395
8396 \snippet code/src_corelib_text_qstring.cpp 8
8397*/
8398QString QString::repeated(qsizetype times) const
8399{
8400 if (d.size == 0)
8401 return *this;
8402
8403 if (times <= 1) {
8404 if (times == 1)
8405 return *this;
8406 return QString();
8407 }
8408
8409 const qsizetype resultSize = times * d.size;
8410
8411 QString result;
8412 result.reserve(asize: resultSize);
8413 if (result.capacity() != resultSize)
8414 return QString(); // not enough memory
8415
8416 memcpy(dest: result.d.data(), src: d.data(), n: d.size * sizeof(QChar));
8417
8418 qsizetype sizeSoFar = d.size;
8419 char16_t *end = result.d.data() + sizeSoFar;
8420
8421 const qsizetype halfResultSize = resultSize >> 1;
8422 while (sizeSoFar <= halfResultSize) {
8423 memcpy(dest: end, src: result.d.data(), n: sizeSoFar * sizeof(QChar));
8424 end += sizeSoFar;
8425 sizeSoFar <<= 1;
8426 }
8427 memcpy(dest: end, src: result.d.data(), n: (resultSize - sizeSoFar) * sizeof(QChar));
8428 result.d.data()[resultSize] = '\0';
8429 result.d.size = resultSize;
8430 return result;
8431}
8432
8433void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, qsizetype from)
8434{
8435 {
8436 // check if it's fully ASCII first, because then we have no work
8437 auto start = reinterpret_cast<const char16_t *>(data->constData());
8438 const char16_t *p = start + from;
8439 if (isAscii_helper(ptr&: p, end: p + data->size() - from))
8440 return;
8441 if (p > start + from)
8442 from = p - start - 1; // need one before the non-ASCII to perform NFC
8443 }
8444
8445 if (version == QChar::Unicode_Unassigned) {
8446 version = QChar::currentUnicodeVersion();
8447 } else if (int(version) <= NormalizationCorrectionsVersionMax) {
8448 const QString &s = *data;
8449 QChar *d = nullptr;
8450 for (const NormalizationCorrection &n : uc_normalization_corrections) {
8451 if (n.version > version) {
8452 qsizetype pos = from;
8453 if (QChar::requiresSurrogates(ucs4: n.ucs4)) {
8454 char16_t ucs4High = QChar::highSurrogate(ucs4: n.ucs4);
8455 char16_t ucs4Low = QChar::lowSurrogate(ucs4: n.ucs4);
8456 char16_t oldHigh = QChar::highSurrogate(ucs4: n.old_mapping);
8457 char16_t oldLow = QChar::lowSurrogate(ucs4: n.old_mapping);
8458 while (pos < s.size() - 1) {
8459 if (s.at(i: pos).unicode() == ucs4High && s.at(i: pos + 1).unicode() == ucs4Low) {
8460 if (!d)
8461 d = data->data();
8462 d[pos] = QChar(oldHigh);
8463 d[++pos] = QChar(oldLow);
8464 }
8465 ++pos;
8466 }
8467 } else {
8468 while (pos < s.size()) {
8469 if (s.at(i: pos).unicode() == n.ucs4) {
8470 if (!d)
8471 d = data->data();
8472 d[pos] = QChar(n.old_mapping);
8473 }
8474 ++pos;
8475 }
8476 }
8477 }
8478 }
8479 }
8480
8481 if (normalizationQuickCheckHelper(str: data, mode, from, lastStable: &from))
8482 return;
8483
8484 decomposeHelper(str: data, canonical: mode < QString::NormalizationForm_KD, version, from);
8485
8486 canonicalOrderHelper(str: data, version, from);
8487
8488 if (mode == QString::NormalizationForm_D || mode == QString::NormalizationForm_KD)
8489 return;
8490
8491 composeHelper(str: data, version, from);
8492}
8493
8494/*!
8495 Returns the string in the given Unicode normalization \a mode,
8496 according to the given \a version of the Unicode standard.
8497*/
8498QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersion version) const
8499{
8500 QString copy = *this;
8501 qt_string_normalize(data: &copy, mode, version, from: 0);
8502 return copy;
8503}
8504
8505#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8506static void checkArgEscape(QStringView s)
8507{
8508 // If we're in here, it means that qArgDigitValue has accepted the
8509 // digit. We can skip the check in case we already know it will
8510 // succeed.
8511 if (!supportUnicodeDigitValuesInArg())
8512 return;
8513
8514 const auto isNonAsciiDigit = [](QChar c) {
8515 return c.unicode() < u'0' || c.unicode() > u'9';
8516 };
8517
8518 if (std::any_of(first: s.begin(), last: s.end(), pred: isNonAsciiDigit)) {
8519 const auto accumulateDigit = [](int partial, QChar digit) {
8520 return partial * 10 + digit.digitValue();
8521 };
8522 const int parsedNumber = std::accumulate(first: s.begin(), last: s.end(), init: 0, binary_op: accumulateDigit);
8523
8524 qWarning(msg: "QString::arg(): the replacement \"%%%ls\" contains non-ASCII digits;\n"
8525 " it is currently being interpreted as the %d-th substitution.\n"
8526 " This is deprecated; support for non-ASCII digits will be dropped\n"
8527 " in a future version of Qt.",
8528 qUtf16Printable(s.toString()),
8529 parsedNumber);
8530 }
8531}
8532#endif
8533
8534struct ArgEscapeData
8535{
8536 int min_escape; // lowest escape sequence number
8537 qsizetype occurrences; // number of occurrences of the lowest escape sequence number
8538 qsizetype locale_occurrences; // number of occurrences of the lowest escape sequence number that
8539 // contain 'L'
8540 qsizetype escape_len; // total length of escape sequences which will be replaced
8541};
8542
8543static ArgEscapeData findArgEscapes(QStringView s)
8544{
8545 const QChar *uc_begin = s.begin();
8546 const QChar *uc_end = s.end();
8547
8548 ArgEscapeData d;
8549
8550 d.min_escape = INT_MAX;
8551 d.occurrences = 0;
8552 d.escape_len = 0;
8553 d.locale_occurrences = 0;
8554
8555 const QChar *c = uc_begin;
8556 while (c != uc_end) {
8557 while (c != uc_end && c->unicode() != '%')
8558 ++c;
8559
8560 if (c == uc_end)
8561 break;
8562 const QChar *escape_start = c;
8563 if (++c == uc_end)
8564 break;
8565
8566 bool locale_arg = false;
8567 if (c->unicode() == 'L') {
8568 locale_arg = true;
8569 if (++c == uc_end)
8570 break;
8571 }
8572
8573 int escape = qArgDigitValue(ch: *c);
8574 if (escape == -1)
8575 continue;
8576
8577 // ### Qt 7: do not allow anything but ASCII digits
8578 // in arg()'s replacements.
8579#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8580 const QChar *escapeBegin = c;
8581 const QChar *escapeEnd = escapeBegin + 1;
8582#endif
8583
8584 ++c;
8585
8586 if (c != uc_end) {
8587 const int next_escape = qArgDigitValue(ch: *c);
8588 if (next_escape != -1) {
8589 escape = (10 * escape) + next_escape;
8590 ++c;
8591#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8592 ++escapeEnd;
8593#endif
8594 }
8595 }
8596
8597#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8598 checkArgEscape(s: QStringView(escapeBegin, escapeEnd));
8599#endif
8600
8601 if (escape > d.min_escape)
8602 continue;
8603
8604 if (escape < d.min_escape) {
8605 d.min_escape = escape;
8606 d.occurrences = 0;
8607 d.escape_len = 0;
8608 d.locale_occurrences = 0;
8609 }
8610
8611 ++d.occurrences;
8612 if (locale_arg)
8613 ++d.locale_occurrences;
8614 d.escape_len += c - escape_start;
8615 }
8616 return d;
8617}
8618
8619static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetype field_width,
8620 QStringView arg, QStringView larg, QChar fillChar)
8621{
8622 // Negative field-width for right-padding, positive for left-padding:
8623 const qsizetype abs_field_width = qAbs(t: field_width);
8624 const qsizetype result_len =
8625 s.size() - d.escape_len
8626 + (d.occurrences - d.locale_occurrences) * qMax(a: abs_field_width, b: arg.size())
8627 + d.locale_occurrences * qMax(a: abs_field_width, b: larg.size());
8628
8629 QString result(result_len, Qt::Uninitialized);
8630 QChar *rc = const_cast<QChar *>(result.unicode());
8631 QChar *const result_end = rc + result_len;
8632 qsizetype repl_cnt = 0;
8633
8634 const QChar *c = s.begin();
8635 const QChar *const uc_end = s.end();
8636 while (c != uc_end) {
8637 Q_ASSERT(d.occurrences > repl_cnt);
8638 /* We don't have to check increments of c against uc_end because, as
8639 long as d.occurrences > repl_cnt, we KNOW there are valid escape
8640 sequences remaining. */
8641
8642 const QChar *text_start = c;
8643 while (c->unicode() != '%')
8644 ++c;
8645
8646 const QChar *escape_start = c++;
8647 const bool localize = c->unicode() == 'L';
8648 if (localize)
8649 ++c;
8650
8651 int escape = qArgDigitValue(ch: *c);
8652 if (escape != -1 && c + 1 != uc_end) {
8653 const int digit = qArgDigitValue(ch: c[1]);
8654 if (digit != -1) {
8655 ++c;
8656 escape = 10 * escape + digit;
8657 }
8658 }
8659
8660 if (escape != d.min_escape) {
8661 memcpy(dest: rc, src: text_start, n: (c - text_start) * sizeof(QChar));
8662 rc += c - text_start;
8663 } else {
8664 ++c;
8665
8666 memcpy(dest: rc, src: text_start, n: (escape_start - text_start) * sizeof(QChar));
8667 rc += escape_start - text_start;
8668
8669 const QStringView use = localize ? larg : arg;
8670 const qsizetype pad_chars = abs_field_width - use.size();
8671 // (If negative, relevant loops are no-ops: no need to check.)
8672
8673 if (field_width > 0) { // left padded
8674 rc = std::fill_n(first: rc, n: pad_chars, value: fillChar);
8675 }
8676
8677 if (use.size())
8678 memcpy(dest: rc, src: use.data(), n: use.size() * sizeof(QChar));
8679 rc += use.size();
8680
8681 if (field_width < 0) { // right padded
8682 rc = std::fill_n(first: rc, n: pad_chars, value: fillChar);
8683 }
8684
8685 if (++repl_cnt == d.occurrences) {
8686 memcpy(dest: rc, src: c, n: (uc_end - c) * sizeof(QChar));
8687 rc += uc_end - c;
8688 Q_ASSERT(rc == result_end);
8689 c = uc_end;
8690 }
8691 }
8692 }
8693 Q_ASSERT(rc == result_end);
8694
8695 return result;
8696}
8697
8698/*!
8699 Returns a copy of this string with the lowest numbered place marker
8700 replaced by string \a a, i.e., \c %1, \c %2, ..., \c %99.
8701
8702 \a fieldWidth specifies the minimum amount of space that argument \a
8703 a shall occupy. If \a a requires less space than \a fieldWidth, it
8704 is padded to \a fieldWidth with character \a fillChar. A positive
8705 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
8706 produces left-aligned text.
8707
8708 This example shows how we might create a \c status string for
8709 reporting progress while processing a list of files:
8710
8711 \snippet qstring/main.cpp 11
8712
8713 First, \c arg(i) replaces \c %1. Then \c arg(total) replaces \c
8714 %2. Finally, \c arg(fileName) replaces \c %3.
8715
8716 One advantage of using arg() over asprintf() is that the order of the
8717 numbered place markers can change, if the application's strings are
8718 translated into other languages, but each arg() will still replace
8719 the lowest numbered unreplaced place marker, no matter where it
8720 appears. Also, if place marker \c %i appears more than once in the
8721 string, the arg() replaces all of them.
8722
8723 If there is no unreplaced place marker remaining, a warning message
8724 is output and the result is undefined. Place marker numbers must be
8725 in the range 1 to 99.
8726*/
8727QString QString::arg(const QString &a, int fieldWidth, QChar fillChar) const
8728{
8729 return arg(a: qToStringViewIgnoringNull(s: a), fieldWidth, fillChar);
8730}
8731
8732/*!
8733 \overload
8734 \since 5.10
8735
8736 Returns a copy of this string with the lowest-numbered place-marker
8737 replaced by string \a a, i.e., \c %1, \c %2, ..., \c %99.
8738
8739 \a fieldWidth specifies the minimum amount of space that \a a
8740 shall occupy. If \a a requires less space than \a fieldWidth, it
8741 is padded to \a fieldWidth with character \a fillChar. A positive
8742 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
8743 produces left-aligned text.
8744
8745 This example shows how we might create a \c status string for
8746 reporting progress while processing a list of files:
8747
8748 \snippet qstring/main.cpp 11-qstringview
8749
8750 First, \c arg(i) replaces \c %1. Then \c arg(total) replaces \c
8751 %2. Finally, \c arg(fileName) replaces \c %3.
8752
8753 One advantage of using arg() over asprintf() is that the order of the
8754 numbered place markers can change, if the application's strings are
8755 translated into other languages, but each arg() will still replace
8756 the lowest-numbered unreplaced place-marker, no matter where it
8757 appears. Also, if place-marker \c %i appears more than once in the
8758 string, arg() replaces all of them.
8759
8760 If there is no unreplaced place-marker remaining, a warning message
8761 is printed and the result is undefined. Place-marker numbers must be
8762 in the range 1 to 99.
8763*/
8764QString QString::arg(QStringView a, int fieldWidth, QChar fillChar) const
8765{
8766 ArgEscapeData d = findArgEscapes(s: *this);
8767
8768 if (Q_UNLIKELY(d.occurrences == 0)) {
8769 qWarning(msg: "QString::arg: Argument missing: %ls, %ls", qUtf16Printable(*this),
8770 qUtf16Printable(a.toString()));
8771 return *this;
8772 }
8773 return replaceArgEscapes(s: *this, d, field_width: fieldWidth, arg: a, larg: a, fillChar);
8774}
8775
8776/*!
8777 \overload
8778 \since 5.10
8779
8780 Returns a copy of this string with the lowest-numbered place-marker
8781 replaced by the Latin-1 string viewed by \a a, i.e., \c %1, \c %2, ..., \c %99.
8782
8783 \a fieldWidth specifies the minimum amount of space that \a a
8784 shall occupy. If \a a requires less space than \a fieldWidth, it
8785 is padded to \a fieldWidth with character \a fillChar. A positive
8786 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
8787 produces left-aligned text.
8788
8789 One advantage of using arg() over asprintf() is that the order of the
8790 numbered place markers can change, if the application's strings are
8791 translated into other languages, but each arg() will still replace
8792 the lowest-numbered unreplaced place-marker, no matter where it
8793 appears. Also, if place-marker \c %i appears more than once in the
8794 string, arg() replaces all of them.
8795
8796 If there is no unreplaced place-marker remaining, a warning message
8797 is printed and the result is undefined. Place-marker numbers must be
8798 in the range 1 to 99.
8799*/
8800QString QString::arg(QLatin1StringView a, int fieldWidth, QChar fillChar) const
8801{
8802 QVarLengthArray<char16_t> utf16 = qt_from_latin1_to_qvla(str: a);
8803 return arg(a: QStringView(utf16.data(), utf16.size()), fieldWidth, fillChar);
8804}
8805
8806/*! \fn QString QString::arg(int a, int fieldWidth, int base, QChar fillChar) const
8807 \overload arg()
8808
8809 The \a a argument is expressed in base \a base, which is 10 by
8810 default and must be between 2 and 36. For bases other than 10, \a a
8811 is treated as an unsigned integer.
8812
8813 \a fieldWidth specifies the minimum amount of space that \a a is
8814 padded to and filled with the character \a fillChar. A positive
8815 value produces right-aligned text; a negative value produces
8816 left-aligned text.
8817
8818 The '%' can be followed by an 'L', in which case the sequence is
8819 replaced with a localized representation of \a a. The conversion
8820 uses the default locale, set by QLocale::setDefault(). If no default
8821 locale was specified, the system locale is used. The 'L' flag is
8822 ignored if \a base is not 10.
8823
8824 \snippet qstring/main.cpp 12
8825 \snippet qstring/main.cpp 14
8826
8827 \sa {Number Formats}
8828*/
8829
8830/*! \fn QString QString::arg(uint a, int fieldWidth, int base, QChar fillChar) const
8831 \overload arg()
8832
8833 The \a base argument specifies the base to use when converting the
8834 integer \a a into a string. The base must be between 2 and 36.
8835
8836 \sa {Number Formats}
8837*/
8838
8839/*! \fn QString QString::arg(long a, int fieldWidth, int base, QChar fillChar) const
8840 \overload arg()
8841
8842 \a fieldWidth specifies the minimum amount of space that \a a is
8843 padded to and filled with the character \a fillChar. A positive
8844 value produces right-aligned text; a negative value produces
8845 left-aligned text.
8846
8847 The \a a argument is expressed in the given \a base, which is 10 by
8848 default and must be between 2 and 36.
8849
8850 The '%' can be followed by an 'L', in which case the sequence is
8851 replaced with a localized representation of \a a. The conversion
8852 uses the default locale. The default locale is determined from the
8853 system's locale settings at application startup. It can be changed
8854 using QLocale::setDefault(). The 'L' flag is ignored if \a base is
8855 not 10.
8856
8857 \snippet qstring/main.cpp 12
8858 \snippet qstring/main.cpp 14
8859
8860 \sa {Number Formats}
8861*/
8862
8863/*!
8864 \fn QString QString::arg(ulong a, int fieldWidth, int base, QChar fillChar) const
8865 \overload arg()
8866
8867 \a fieldWidth specifies the minimum amount of space that \a a is
8868 padded to and filled with the character \a fillChar. A positive
8869 value produces right-aligned text; a negative value produces
8870 left-aligned text.
8871
8872 The \a base argument specifies the base to use when converting the
8873 integer \a a to a string. The base must be between 2 and 36, with 8
8874 giving octal, 10 decimal, and 16 hexadecimal numbers.
8875
8876 \sa {Number Formats}
8877*/
8878
8879/*!
8880 \overload arg()
8881
8882 \a fieldWidth specifies the minimum amount of space that \a a is
8883 padded to and filled with the character \a fillChar. A positive
8884 value produces right-aligned text; a negative value produces
8885 left-aligned text.
8886
8887 The \a base argument specifies the base to use when converting the
8888 integer \a a into a string. The base must be between 2 and 36, with
8889 8 giving octal, 10 decimal, and 16 hexadecimal numbers.
8890
8891 \sa {Number Formats}
8892*/
8893QString QString::arg(qlonglong a, int fieldWidth, int base, QChar fillChar) const
8894{
8895 ArgEscapeData d = findArgEscapes(s: *this);
8896
8897 if (d.occurrences == 0) {
8898 qWarning() << "QString::arg: Argument missing:" << *this << ',' << a;
8899 return *this;
8900 }
8901
8902 unsigned flags = QLocaleData::NoFlags;
8903 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8904 if (fillChar == u'0')
8905 flags = QLocaleData::ZeroPadded;
8906
8907 QString arg;
8908 if (d.occurrences > d.locale_occurrences) {
8909 arg = QLocaleData::c()->longLongToString(l: a, precision: -1, base, width: fieldWidth, flags);
8910 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8911 }
8912
8913 QString localeArg;
8914 if (d.locale_occurrences > 0) {
8915 QLocale locale;
8916 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8917 flags |= QLocaleData::GroupDigits;
8918 localeArg = locale.d->m_data->longLongToString(l: a, precision: -1, base, width: fieldWidth, flags);
8919 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8920 }
8921
8922 return replaceArgEscapes(s: *this, d, field_width: fieldWidth, arg, larg: localeArg, fillChar);
8923}
8924
8925/*!
8926 \overload arg()
8927
8928 \a fieldWidth specifies the minimum amount of space that \a a is
8929 padded to and filled with the character \a fillChar. A positive
8930 value produces right-aligned text; a negative value produces
8931 left-aligned text.
8932
8933 The \a base argument specifies the base to use when converting the
8934 integer \a a into a string. \a base must be between 2 and 36, with 8
8935 giving octal, 10 decimal, and 16 hexadecimal numbers.
8936
8937 \sa {Number Formats}
8938*/
8939QString QString::arg(qulonglong a, int fieldWidth, int base, QChar fillChar) const
8940{
8941 ArgEscapeData d = findArgEscapes(s: *this);
8942
8943 if (d.occurrences == 0) {
8944 qWarning() << "QString::arg: Argument missing:" << *this << ',' << a;
8945 return *this;
8946 }
8947
8948 unsigned flags = QLocaleData::NoFlags;
8949 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8950 if (fillChar == u'0')
8951 flags = QLocaleData::ZeroPadded;
8952
8953 QString arg;
8954 if (d.occurrences > d.locale_occurrences) {
8955 arg = QLocaleData::c()->unsLongLongToString(l: a, precision: -1, base, width: fieldWidth, flags);
8956 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8957 }
8958
8959 QString localeArg;
8960 if (d.locale_occurrences > 0) {
8961 QLocale locale;
8962 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8963 flags |= QLocaleData::GroupDigits;
8964 localeArg = locale.d->m_data->unsLongLongToString(l: a, precision: -1, base, width: fieldWidth, flags);
8965 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8966 }
8967
8968 return replaceArgEscapes(s: *this, d, field_width: fieldWidth, arg, larg: localeArg, fillChar);
8969}
8970
8971/*!
8972 \overload arg()
8973
8974 \fn QString QString::arg(short a, int fieldWidth, int base, QChar fillChar) const
8975
8976 \a fieldWidth specifies the minimum amount of space that \a a is
8977 padded to and filled with the character \a fillChar. A positive
8978 value produces right-aligned text; a negative value produces
8979 left-aligned text.
8980
8981 The \a base argument specifies the base to use when converting the
8982 integer \a a into a string. The base must be between 2 and 36, with
8983 8 giving octal, 10 decimal, and 16 hexadecimal numbers.
8984
8985 \sa {Number Formats}
8986*/
8987
8988/*!
8989 \fn QString QString::arg(ushort a, int fieldWidth, int base, QChar fillChar) const
8990 \overload arg()
8991
8992 \a fieldWidth specifies the minimum amount of space that \a a is
8993 padded to and filled with the character \a fillChar. A positive
8994 value produces right-aligned text; a negative value produces
8995 left-aligned text.
8996
8997 The \a base argument specifies the base to use when converting the
8998 integer \a a into a string. The base must be between 2 and 36, with
8999 8 giving octal, 10 decimal, and 16 hexadecimal numbers.
9000
9001 \sa {Number Formats}
9002*/
9003
9004/*!
9005 \overload arg()
9006*/
9007QString QString::arg(QChar a, int fieldWidth, QChar fillChar) const
9008{
9009 return arg(a: QStringView{&a, 1}, fieldWidth, fillChar);
9010}
9011
9012/*!
9013 \overload arg()
9014
9015 The \a a argument is interpreted as a Latin-1 character.
9016*/
9017QString QString::arg(char a, int fieldWidth, QChar fillChar) const
9018{
9019 return arg(a: QLatin1Char(a), fieldWidth, fillChar);
9020}
9021
9022/*!
9023 \overload arg()
9024
9025 Argument \a a is formatted according to the specified \a format and
9026 \a precision. See \l{Floating-point Formats} for details.
9027
9028 \a fieldWidth specifies the minimum amount of space that \a a is
9029 padded to and filled with the character \a fillChar. A positive
9030 value produces right-aligned text; a negative value produces
9031 left-aligned text.
9032
9033 \snippet code/src_corelib_text_qstring.cpp 2
9034
9035 \sa QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number Formats}
9036*/
9037QString QString::arg(double a, int fieldWidth, char format, int precision, QChar fillChar) const
9038{
9039 ArgEscapeData d = findArgEscapes(s: *this);
9040
9041 if (d.occurrences == 0) {
9042 qWarning(msg: "QString::arg: Argument missing: %s, %g", toLocal8Bit().data(), a);
9043 return *this;
9044 }
9045
9046 unsigned flags = QLocaleData::NoFlags;
9047 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
9048 if (fillChar == u'0')
9049 flags |= QLocaleData::ZeroPadded;
9050
9051 if (isAsciiUpper(c: format))
9052 flags |= QLocaleData::CapitalEorX;
9053
9054 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
9055 switch (QtMiscUtils::toAsciiLower(ch: format)) {
9056 case 'f':
9057 form = QLocaleData::DFDecimal;
9058 break;
9059 case 'e':
9060 form = QLocaleData::DFExponent;
9061 break;
9062 case 'g':
9063 form = QLocaleData::DFSignificantDigits;
9064 break;
9065 default:
9066#if defined(QT_CHECK_RANGE)
9067 qWarning("QString::arg: Invalid format char '%c'", format);
9068#endif
9069 break;
9070 }
9071
9072 QString arg;
9073 if (d.occurrences > d.locale_occurrences) {
9074 arg = QLocaleData::c()->doubleToString(d: a, precision, form, width: fieldWidth,
9075 flags: flags | QLocaleData::ZeroPadExponent);
9076 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
9077 || fieldWidth <= arg.size());
9078 }
9079
9080 QString localeArg;
9081 if (d.locale_occurrences > 0) {
9082 QLocale locale;
9083
9084 const QLocale::NumberOptions numberOptions = locale.numberOptions();
9085 if (!(numberOptions & QLocale::OmitGroupSeparator))
9086 flags |= QLocaleData::GroupDigits;
9087 if (!(numberOptions & QLocale::OmitLeadingZeroInExponent))
9088 flags |= QLocaleData::ZeroPadExponent;
9089 if (numberOptions & QLocale::IncludeTrailingZeroesAfterDot)
9090 flags |= QLocaleData::AddTrailingZeroes;
9091 localeArg = locale.d->m_data->doubleToString(d: a, precision, form, width: fieldWidth, flags);
9092 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
9093 || fieldWidth <= localeArg.size());
9094 }
9095
9096 return replaceArgEscapes(s: *this, d, field_width: fieldWidth, arg, larg: localeArg, fillChar);
9097}
9098
9099static inline char16_t to_unicode(const QChar c) { return c.unicode(); }
9100static inline char16_t to_unicode(const char c) { return QLatin1Char{c}.unicode(); }
9101
9102template <typename Char>
9103static int getEscape(const Char *uc, qsizetype *pos, qsizetype len)
9104{
9105 qsizetype i = *pos;
9106 ++i;
9107 if (i < len && uc[i] == u'L')
9108 ++i;
9109 if (i < len) {
9110 int escape = to_unicode(uc[i]) - '0';
9111 if (uint(escape) >= 10U)
9112 return -1;
9113 ++i;
9114 if (i < len) {
9115 // there's a second digit
9116 int digit = to_unicode(uc[i]) - '0';
9117 if (uint(digit) < 10U) {
9118 escape = (escape * 10) + digit;
9119 ++i;
9120 }
9121 }
9122 *pos = i;
9123 return escape;
9124 }
9125 return -1;
9126}
9127
9128/*
9129 Algorithm for multiArg:
9130
9131 1. Parse the string as a sequence of verbatim text and placeholders (%L?\d{,3}).
9132 The L is parsed and accepted for compatibility with non-multi-arg, but since
9133 multiArg only accepts strings as replacements, the localization request can
9134 be safely ignored.
9135 2. The result of step (1) is a list of (string-ref,int)-tuples. The string-ref
9136 either points at text to be copied verbatim (in which case the int is -1),
9137 or, initially, at the textual representation of the placeholder. In that case,
9138 the int contains the numerical number as parsed from the placeholder.
9139 3. Next, collect all the non-negative ints found, sort them in ascending order and
9140 remove duplicates.
9141 3a. If the result has more entries than multiArg() was given replacement strings,
9142 we have found placeholders we can't satisfy with replacement strings. That is
9143 fine (there could be another .arg() call coming after this one), so just
9144 truncate the result to the number of actual multiArg() replacement strings.
9145 3b. If the result has less entries than multiArg() was given replacement strings,
9146 the string is missing placeholders. This is an error that the user should be
9147 warned about.
9148 4. The result of step (3) is a mapping from the index of any replacement string to
9149 placeholder number. This is the wrong way around, but since placeholder
9150 numbers could get as large as 999, while we typically don't have more than 9
9151 replacement strings, we trade 4K of sparsely-used memory for doing a reverse lookup
9152 each time we need to map a placeholder number to a replacement string index
9153 (that's a linear search; but still *much* faster than using an associative container).
9154 5. Next, for each of the tuples found in step (1), do the following:
9155 5a. If the int is negative, do nothing.
9156 5b. Otherwise, if the int is found in the result of step (3) at index I, replace
9157 the string-ref with a string-ref for the (complete) I'th replacement string.
9158 5c. Otherwise, do nothing.
9159 6. Concatenate all string refs into a single result string.
9160*/
9161
9162namespace {
9163struct Part
9164{
9165 Part() = default; // for QVarLengthArray; do not use
9166 constexpr Part(QAnyStringView s, int num = -1)
9167 : string{s}, number{num} {}
9168
9169 void reset(QAnyStringView s) noexcept { *this = {s, number}; }
9170
9171 QAnyStringView string;
9172 int number;
9173};
9174} // unnamed namespace
9175
9176Q_DECLARE_TYPEINFO(Part, Q_PRIMITIVE_TYPE);
9177
9178namespace {
9179
9180enum { ExpectedParts = 32 };
9181
9182typedef QVarLengthArray<Part, ExpectedParts> ParseResult;
9183typedef QVarLengthArray<int, ExpectedParts/2> ArgIndexToPlaceholderMap;
9184
9185template <typename StringView>
9186static ParseResult parseMultiArgFormatString(StringView s)
9187{
9188 ParseResult result;
9189
9190 const auto uc = s.data();
9191 const auto len = s.size();
9192 const auto end = len - 1;
9193 qsizetype i = 0;
9194 qsizetype last = 0;
9195
9196 while (i < end) {
9197 if (uc[i] == u'%') {
9198 qsizetype percent = i;
9199 int number = getEscape(uc, &i, len);
9200 if (number != -1) {
9201 if (last != percent)
9202 result.push_back(t: Part{s.sliced(last, percent - last)}); // literal text (incl. failed placeholders)
9203 result.push_back(t: Part{s.sliced(percent, i - percent), number}); // parsed placeholder
9204 last = i;
9205 continue;
9206 }
9207 }
9208 ++i;
9209 }
9210
9211 if (last < len)
9212 result.push_back(t: Part{s.sliced(last, len - last)}); // trailing literal text
9213
9214 return result;
9215}
9216
9217static ArgIndexToPlaceholderMap makeArgIndexToPlaceholderMap(const ParseResult &parts)
9218{
9219 ArgIndexToPlaceholderMap result;
9220
9221 for (const Part &part : parts) {
9222 if (part.number >= 0)
9223 result.push_back(t: part.number);
9224 }
9225
9226 std::sort(first: result.begin(), last: result.end());
9227 result.erase(abegin: std::unique(first: result.begin(), last: result.end()),
9228 aend: result.end());
9229
9230 return result;
9231}
9232
9233static qsizetype resolveStringRefsAndReturnTotalSize(ParseResult &parts, const ArgIndexToPlaceholderMap &argIndexToPlaceholderMap, const QtPrivate::ArgBase *args[])
9234{
9235 using namespace QtPrivate;
9236 qsizetype totalSize = 0;
9237 for (Part &part : parts) {
9238 if (part.number != -1) {
9239 const auto it = std::find(first: argIndexToPlaceholderMap.begin(), last: argIndexToPlaceholderMap.end(), val: part.number);
9240 if (it != argIndexToPlaceholderMap.end()) {
9241 const auto &arg = *args[it - argIndexToPlaceholderMap.begin()];
9242 switch (arg.tag) {
9243 case ArgBase::L1:
9244 part.reset(s: static_cast<const QLatin1StringArg&>(arg).string);
9245 break;
9246 case ArgBase::U8:
9247 Q_UNREACHABLE(); // waiting for QUtf8String...
9248 break;
9249 case ArgBase::U16:
9250 part.reset(s: static_cast<const QStringViewArg&>(arg).string);
9251 break;
9252 }
9253 }
9254 }
9255 totalSize += part.string.size();
9256 }
9257 return totalSize;
9258}
9259
9260} // unnamed namespace
9261
9262template <typename StringView>
9263static QString argToQStringImpl(StringView pattern, size_t numArgs, const QtPrivate::ArgBase **args)
9264{
9265 // Step 1-2 above
9266 ParseResult parts = parseMultiArgFormatString(pattern);
9267
9268 // 3-4
9269 ArgIndexToPlaceholderMap argIndexToPlaceholderMap = makeArgIndexToPlaceholderMap(parts);
9270
9271 if (static_cast<size_t>(argIndexToPlaceholderMap.size()) > numArgs) // 3a
9272 argIndexToPlaceholderMap.resize(sz: qsizetype(numArgs));
9273 else if (Q_UNLIKELY(static_cast<size_t>(argIndexToPlaceholderMap.size()) < numArgs)) // 3b
9274 qWarning(msg: "QString::arg: %d argument(s) missing in %ls",
9275 int(numArgs - argIndexToPlaceholderMap.size()), qUtf16Printable(pattern.toString()));
9276
9277 // 5
9278 const qsizetype totalSize = resolveStringRefsAndReturnTotalSize(parts, argIndexToPlaceholderMap, args);
9279
9280 // 6:
9281 QString result(totalSize, Qt::Uninitialized);
9282 auto out = const_cast<QChar*>(result.constData());
9283
9284 struct Concatenate {
9285 QChar *out;
9286 QChar *operator()(QLatin1String part) noexcept
9287 {
9288 if (part.size()) {
9289 qt_from_latin1(dst: reinterpret_cast<char16_t*>(out),
9290 str: part.data(), size: part.size());
9291 }
9292 return out + part.size();
9293 }
9294 QChar *operator()(QUtf8StringView part) noexcept
9295 {
9296 return QUtf8::convertToUnicode(buffer: out, in: part);
9297 }
9298 QChar *operator()(QStringView part) noexcept
9299 {
9300 if (part.size())
9301 memcpy(dest: out, src: part.data(), n: part.size() * sizeof(QChar));
9302 return out + part.size();
9303 }
9304 };
9305
9306 for (const Part &part : parts)
9307 out = part.string.visit(Concatenate{out});
9308
9309 // UTF-8 decoding may have caused an overestimate of totalSize - correct it:
9310 result.truncate(pos: out - result.cbegin());
9311
9312 return result;
9313}
9314
9315QString QtPrivate::argToQString(QStringView pattern, size_t n, const ArgBase **args)
9316{
9317 return argToQStringImpl(pattern, numArgs: n, args);
9318}
9319
9320QString QtPrivate::argToQString(QLatin1StringView pattern, size_t n, const ArgBase **args)
9321{
9322 return argToQStringImpl(pattern, numArgs: n, args);
9323}
9324
9325/*! \fn bool QString::isRightToLeft() const
9326
9327 Returns \c true if the string is read right to left.
9328
9329 \sa QStringView::isRightToLeft()
9330*/
9331bool QString::isRightToLeft() const
9332{
9333 return QtPrivate::isRightToLeft(string: QStringView(*this));
9334}
9335
9336/*!
9337 \fn bool QString::isValidUtf16() const noexcept
9338 \since 5.15
9339
9340 Returns \c true if the string contains valid UTF-16 encoded data,
9341 or \c false otherwise.
9342
9343 Note that this function does not perform any special validation of the
9344 data; it merely checks if it can be successfully decoded from UTF-16.
9345 The data is assumed to be in host byte order; the presence of a BOM
9346 is meaningless.
9347
9348 \sa QStringView::isValidUtf16()
9349*/
9350
9351/*! \fn QChar *QString::data()
9352
9353 Returns a pointer to the data stored in the QString. The pointer
9354 can be used to access and modify the characters that compose the
9355 string.
9356
9357 Unlike constData() and unicode(), the returned data is always
9358 '\\0'-terminated.
9359
9360 Example:
9361
9362 \snippet qstring/main.cpp 19
9363
9364 Note that the pointer remains valid only as long as the string is
9365 not modified by other means. For read-only access, constData() is
9366 faster because it never causes a \l{deep copy} to occur.
9367
9368 \sa constData(), operator[]()
9369*/
9370
9371/*! \fn const QChar *QString::data() const
9372
9373 \overload
9374
9375 \note The returned string may not be '\\0'-terminated.
9376 Use size() to determine the length of the array.
9377
9378 \sa fromRawData()
9379*/
9380
9381/*! \fn const QChar *QString::constData() const
9382
9383 Returns a pointer to the data stored in the QString. The pointer
9384 can be used to access the characters that compose the string.
9385
9386 Note that the pointer remains valid only as long as the string is
9387 not modified.
9388
9389 \note The returned string may not be '\\0'-terminated.
9390 Use size() to determine the length of the array.
9391
9392 \sa data(), operator[](), fromRawData()
9393*/
9394
9395/*! \fn void QString::push_front(const QString &other)
9396
9397 This function is provided for STL compatibility, prepending the
9398 given \a other string to the beginning of this string. It is
9399 equivalent to \c prepend(other).
9400
9401 \sa prepend()
9402*/
9403
9404/*! \fn void QString::push_front(QChar ch)
9405
9406 \overload
9407
9408 Prepends the given \a ch character to the beginning of this string.
9409*/
9410
9411/*! \fn void QString::push_back(const QString &other)
9412
9413 This function is provided for STL compatibility, appending the
9414 given \a other string onto the end of this string. It is
9415 equivalent to \c append(other).
9416
9417 \sa append()
9418*/
9419
9420/*! \fn void QString::push_back(QChar ch)
9421
9422 \overload
9423
9424 Appends the given \a ch character onto the end of this string.
9425*/
9426
9427/*!
9428 \since 6.1
9429
9430 Removes from the string the characters in the half-open range
9431 [ \a first , \a last ). Returns an iterator to the character
9432 immediately after the last erased character (i.e. the character
9433 referred to by \a last before the erase).
9434*/
9435QString::iterator QString::erase(QString::const_iterator first, QString::const_iterator last)
9436{
9437 const auto start = std::distance(first: cbegin(), last: first);
9438 const auto len = std::distance(first: first, last: last);
9439 remove(pos: start, len);
9440 return begin() + start;
9441}
9442
9443/*!
9444 \fn QString::iterator QString::erase(QString::const_iterator it)
9445
9446 \overload
9447 \since 6.5
9448
9449 Removes the character denoted by \c it from the string.
9450 Returns an iterator to the character immediately after the
9451 erased character.
9452
9453 \code
9454 QString c = "abcdefg";
9455 auto it = c.erase(c.cbegin()); // c is now "bcdefg"; "it" points to "b"
9456 \endcode
9457*/
9458
9459/*! \fn void QString::shrink_to_fit()
9460 \since 5.10
9461
9462 This function is provided for STL compatibility. It is
9463 equivalent to squeeze().
9464
9465 \sa squeeze()
9466*/
9467
9468/*!
9469 \fn std::string QString::toStdString() const
9470
9471 Returns a std::string object with the data contained in this
9472 QString. The Unicode data is converted into 8-bit characters using
9473 the toUtf8() function.
9474
9475 This method is mostly useful to pass a QString to a function
9476 that accepts a std::string object.
9477
9478 \sa toLatin1(), toUtf8(), toLocal8Bit(), QByteArray::toStdString()
9479*/
9480
9481/*!
9482 Constructs a QString that uses the first \a size Unicode characters
9483 in the array \a unicode. The data in \a unicode is \e not
9484 copied. The caller must be able to guarantee that \a unicode will
9485 not be deleted or modified as long as the QString (or an
9486 unmodified copy of it) exists.
9487
9488 Any attempts to modify the QString or copies of it will cause it
9489 to create a deep copy of the data, ensuring that the raw data
9490 isn't modified.
9491
9492 Here is an example of how we can use a QRegularExpression on raw data in
9493 memory without requiring to copy the data into a QString:
9494
9495 \snippet qstring/main.cpp 22
9496 \snippet qstring/main.cpp 23
9497
9498 \warning A string created with fromRawData() is \e not
9499 '\\0'-terminated, unless the raw data contains a '\\0' character
9500 at position \a size. This means unicode() will \e not return a
9501 '\\0'-terminated string (although utf16() does, at the cost of
9502 copying the raw data).
9503
9504 \sa fromUtf16(), setRawData()
9505*/
9506QString QString::fromRawData(const QChar *unicode, qsizetype size)
9507{
9508 return QString(DataPointer::fromRawData(rawData: const_cast<char16_t *>(reinterpret_cast<const char16_t *>(unicode)), length: size));
9509}
9510
9511/*!
9512 \since 4.7
9513
9514 Resets the QString to use the first \a size Unicode characters
9515 in the array \a unicode. The data in \a unicode is \e not
9516 copied. The caller must be able to guarantee that \a unicode will
9517 not be deleted or modified as long as the QString (or an
9518 unmodified copy of it) exists.
9519
9520 This function can be used instead of fromRawData() to re-use
9521 existings QString objects to save memory re-allocations.
9522
9523 \sa fromRawData()
9524*/
9525QString &QString::setRawData(const QChar *unicode, qsizetype size)
9526{
9527 if (!unicode || !size) {
9528 clear();
9529 }
9530 *this = fromRawData(unicode, size);
9531 return *this;
9532}
9533
9534/*! \fn QString QString::fromStdU16String(const std::u16string &str)
9535 \since 5.5
9536
9537 \include qstring.cpp {from-std-string} {UTF-16} {fromUtf16()}
9538
9539 \sa fromUtf16(), fromStdWString(), fromStdU32String()
9540*/
9541
9542/*!
9543 \fn std::u16string QString::toStdU16String() const
9544 \since 5.5
9545
9546 Returns a std::u16string object with the data contained in this
9547 QString. The Unicode data is the same as returned by the utf16()
9548 method.
9549
9550 \sa utf16(), toStdWString(), toStdU32String()
9551*/
9552
9553/*! \fn QString QString::fromStdU32String(const std::u32string &str)
9554 \since 5.5
9555
9556 \include qstring.cpp {from-std-string} {UCS-4} {fromUcs4()}
9557
9558 \sa fromUcs4(), fromStdWString(), fromStdU16String()
9559*/
9560
9561/*!
9562 \fn std::u32string QString::toStdU32String() const
9563 \since 5.5
9564
9565 Returns a std::u32string object with the data contained in this
9566 QString. The Unicode data is the same as returned by the toUcs4()
9567 method.
9568
9569 \sa toUcs4(), toStdWString(), toStdU16String()
9570*/
9571
9572#if !defined(QT_NO_DATASTREAM)
9573/*!
9574 \fn QDataStream &operator<<(QDataStream &stream, const QString &string)
9575 \relates QString
9576
9577 Writes the given \a string to the specified \a stream.
9578
9579 \sa {Serializing Qt Data Types}
9580*/
9581
9582QDataStream &operator<<(QDataStream &out, const QString &str)
9583{
9584 if (out.version() == 1) {
9585 out << str.toLatin1();
9586 } else {
9587 if (!str.isNull() || out.version() < 3) {
9588 if ((out.byteOrder() == QDataStream::BigEndian) == (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9589 out.writeBytes(reinterpret_cast<const char *>(str.unicode()),
9590 len: static_cast<qsizetype>(sizeof(QChar) * str.size()));
9591 } else {
9592 QVarLengthArray<char16_t> buffer(str.size());
9593 qbswap<sizeof(char16_t)>(source: str.constData(), count: str.size(), dest: buffer.data());
9594 out.writeBytes(reinterpret_cast<const char *>(buffer.data()),
9595 len: static_cast<qsizetype>(sizeof(char16_t) * buffer.size()));
9596 }
9597 } else {
9598 QDataStream::writeQSizeType(s&: out, value: -1); // write null marker
9599 }
9600 }
9601 return out;
9602}
9603
9604/*!
9605 \fn QDataStream &operator>>(QDataStream &stream, QString &string)
9606 \relates QString
9607
9608 Reads a string from the specified \a stream into the given \a string.
9609
9610 \sa {Serializing Qt Data Types}
9611*/
9612
9613QDataStream &operator>>(QDataStream &in, QString &str)
9614{
9615 if (in.version() == 1) {
9616 QByteArray l;
9617 in >> l;
9618 str = QString::fromLatin1(ba: l);
9619 } else {
9620 qint64 size = QDataStream::readQSizeType(s&: in);
9621 qsizetype bytes = size;
9622 if (size != bytes || size < -1) {
9623 str.clear();
9624 in.setStatus(QDataStream::SizeLimitExceeded);
9625 return in;
9626 }
9627 if (bytes == -1) { // null string
9628 str = QString();
9629 } else if (bytes > 0) {
9630 if (bytes & 0x1) {
9631 str.clear();
9632 in.setStatus(QDataStream::ReadCorruptData);
9633 return in;
9634 }
9635
9636 const qsizetype Step = 1024 * 1024;
9637 qsizetype len = bytes / 2;
9638 qsizetype allocated = 0;
9639
9640 while (allocated < len) {
9641 int blockSize = qMin(a: Step, b: len - allocated);
9642 str.resize(size: allocated + blockSize);
9643 if (in.readRawData(reinterpret_cast<char *>(str.data()) + allocated * 2,
9644 len: blockSize * 2) != blockSize * 2) {
9645 str.clear();
9646 in.setStatus(QDataStream::ReadPastEnd);
9647 return in;
9648 }
9649 allocated += blockSize;
9650 }
9651
9652 if ((in.byteOrder() == QDataStream::BigEndian)
9653 != (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9654 char16_t *data = reinterpret_cast<char16_t *>(str.data());
9655 qbswap<sizeof(*data)>(source: data, count: len, dest: data);
9656 }
9657 } else {
9658 str = QString(QLatin1StringView(""));
9659 }
9660 }
9661 return in;
9662}
9663#endif // QT_NO_DATASTREAM
9664
9665/*!
9666 \typedef QString::Data
9667 \internal
9668*/
9669
9670/*!
9671 \typedef QString::DataPtr
9672 \internal
9673*/
9674
9675/*!
9676 \fn DataPtr & QString::data_ptr()
9677 \internal
9678*/
9679
9680/*!
9681 \since 5.11
9682 \internal
9683 \relates QStringView
9684
9685 Returns \c true if the string is read right to left.
9686
9687 \sa QString::isRightToLeft()
9688*/
9689bool QtPrivate::isRightToLeft(QStringView string) noexcept
9690{
9691 int isolateLevel = 0;
9692
9693 for (QStringIterator i(string); i.hasNext();) {
9694 const char32_t c = i.next();
9695
9696 switch (QChar::direction(ucs4: c)) {
9697 case QChar::DirRLI:
9698 case QChar::DirLRI:
9699 case QChar::DirFSI:
9700 ++isolateLevel;
9701 break;
9702 case QChar::DirPDI:
9703 if (isolateLevel)
9704 --isolateLevel;
9705 break;
9706 case QChar::DirL:
9707 if (isolateLevel)
9708 break;
9709 return false;
9710 case QChar::DirR:
9711 case QChar::DirAL:
9712 if (isolateLevel)
9713 break;
9714 return true;
9715 case QChar::DirEN:
9716 case QChar::DirES:
9717 case QChar::DirET:
9718 case QChar::DirAN:
9719 case QChar::DirCS:
9720 case QChar::DirB:
9721 case QChar::DirS:
9722 case QChar::DirWS:
9723 case QChar::DirON:
9724 case QChar::DirLRE:
9725 case QChar::DirLRO:
9726 case QChar::DirRLE:
9727 case QChar::DirRLO:
9728 case QChar::DirPDF:
9729 case QChar::DirNSM:
9730 case QChar::DirBN:
9731 break;
9732 }
9733 }
9734 return false;
9735}
9736
9737qsizetype QtPrivate::count(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9738{
9739 qsizetype num = 0;
9740 qsizetype i = -1;
9741 if (haystack.size() > 500 && needle.size() > 5) {
9742 QStringMatcher matcher(needle, cs);
9743 while ((i = matcher.indexIn(str: haystack, from: i + 1)) != -1)
9744 ++num;
9745 } else {
9746 while ((i = QtPrivate::findString(haystack, from: i + 1, needle, cs)) != -1)
9747 ++num;
9748 }
9749 return num;
9750}
9751
9752qsizetype QtPrivate::count(QStringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9753{
9754 if (cs == Qt::CaseSensitive)
9755 return std::count(first: haystack.cbegin(), last: haystack.cend(), value: needle);
9756
9757 needle = foldCase(ch: needle);
9758 return std::count_if(first: haystack.cbegin(), last: haystack.cend(),
9759 pred: [needle](const QChar c) { return foldAndCompare(a: c, b: needle); });
9760}
9761
9762qsizetype QtPrivate::count(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9763{
9764 qsizetype num = 0;
9765 qsizetype i = -1;
9766
9767 QLatin1StringMatcher matcher(needle, cs);
9768 while ((i = matcher.indexIn(haystack, from: i + 1)) != -1)
9769 ++num;
9770
9771 return num;
9772}
9773
9774qsizetype QtPrivate::count(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9775{
9776 if (haystack.size() < needle.size())
9777 return 0;
9778
9779 if (!QtPrivate::isLatin1(s: needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9780 return 0;
9781
9782 qsizetype num = 0;
9783 qsizetype i = -1;
9784
9785 QVarLengthArray<uchar> s(needle.size());
9786 qt_to_latin1_unchecked(dst: s.data(), src: needle.utf16(), length: needle.size());
9787
9788 QLatin1StringMatcher matcher(QLatin1StringView(reinterpret_cast<char *>(s.data()), s.size()),
9789 cs);
9790 while ((i = matcher.indexIn(haystack, from: i + 1)) != -1)
9791 ++num;
9792
9793 return num;
9794}
9795
9796qsizetype QtPrivate::count(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9797{
9798 if (haystack.size() < needle.size())
9799 return -1;
9800
9801 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(str: needle);
9802 return QtPrivate::count(haystack, needle: QStringView(s.data(), s.size()), cs);
9803}
9804
9805qsizetype QtPrivate::count(QLatin1StringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9806{
9807 // non-L1 needles cannot possibly match in L1-only haystacks
9808 if (needle.unicode() > 0xff)
9809 return 0;
9810
9811 if (cs == Qt::CaseSensitive) {
9812 return std::count(first: haystack.cbegin(), last: haystack.cend(), value: needle.toLatin1());
9813 } else {
9814 return std::count_if(first: haystack.cbegin(), last: haystack.cend(),
9815 pred: CaseInsensitiveL1::matcher(ch: needle.toLatin1()));
9816 }
9817}
9818
9819/*!
9820 \fn bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9821 \since 5.10
9822 \fn bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9823 \since 5.10
9824 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9825 \since 5.10
9826 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9827 \since 5.10
9828 \internal
9829 \relates QStringView
9830
9831 Returns \c true if \a haystack starts with \a needle,
9832 otherwise returns \c false.
9833
9834 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9835
9836 \sa QtPrivate::endsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9837*/
9838
9839bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9840{
9841 return qt_starts_with_impl(haystack, needle, cs);
9842}
9843
9844bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9845{
9846 return qt_starts_with_impl(haystack, needle, cs);
9847}
9848
9849bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9850{
9851 return qt_starts_with_impl(haystack, needle, cs);
9852}
9853
9854bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9855{
9856 return qt_starts_with_impl(haystack, needle, cs);
9857}
9858
9859/*!
9860 \fn bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9861 \since 5.10
9862 \fn bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9863 \since 5.10
9864 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9865 \since 5.10
9866 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9867 \since 5.10
9868 \internal
9869 \relates QStringView
9870
9871 Returns \c true if \a haystack ends with \a needle,
9872 otherwise returns \c false.
9873
9874 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9875
9876 \sa QtPrivate::startsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9877*/
9878
9879bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9880{
9881 return qt_ends_with_impl(haystack, needle, cs);
9882}
9883
9884bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9885{
9886 return qt_ends_with_impl(haystack, needle, cs);
9887}
9888
9889bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9890{
9891 return qt_ends_with_impl(haystack, needle, cs);
9892}
9893
9894bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9895{
9896 return qt_ends_with_impl(haystack, needle, cs);
9897}
9898
9899qsizetype QtPrivate::findString(QStringView haystack0, qsizetype from, QStringView needle0, Qt::CaseSensitivity cs) noexcept
9900{
9901 const qsizetype l = haystack0.size();
9902 const qsizetype sl = needle0.size();
9903 if (sl == 1)
9904 return findString(str: haystack0, from, ch: needle0[0], cs);
9905 if (from < 0)
9906 from += l;
9907 if (std::size_t(sl + from) > std::size_t(l))
9908 return -1;
9909 if (!sl)
9910 return from;
9911 if (!l)
9912 return -1;
9913
9914 /*
9915 We use the Boyer-Moore algorithm in cases where the overhead
9916 for the skip table should pay off, otherwise we use a simple
9917 hash function.
9918 */
9919 if (l > 500 && sl > 5)
9920 return qFindStringBoyerMoore(haystack: haystack0, from, needle: needle0, cs);
9921
9922 auto sv = [sl](const char16_t *v) { return QStringView(v, sl); };
9923 /*
9924 We use some hashing for efficiency's sake. Instead of
9925 comparing strings, we compare the hash value of str with that
9926 of a part of this QString. Only if that matches, we call
9927 qt_string_compare().
9928 */
9929 const char16_t *needle = needle0.utf16();
9930 const char16_t *haystack = haystack0.utf16() + from;
9931 const char16_t *end = haystack0.utf16() + (l - sl);
9932 const qregisteruint sl_minus_1 = sl - 1;
9933 qregisteruint hashNeedle = 0, hashHaystack = 0;
9934 qsizetype idx;
9935
9936 if (cs == Qt::CaseSensitive) {
9937 for (idx = 0; idx < sl; ++idx) {
9938 hashNeedle = ((hashNeedle<<1) + needle[idx]);
9939 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
9940 }
9941 hashHaystack -= haystack[sl_minus_1];
9942
9943 while (haystack <= end) {
9944 hashHaystack += haystack[sl_minus_1];
9945 if (hashHaystack == hashNeedle
9946 && QtPrivate::compareStrings(lhs: needle0, rhs: sv(haystack), cs: Qt::CaseSensitive) == 0)
9947 return haystack - haystack0.utf16();
9948
9949 REHASH(*haystack);
9950 ++haystack;
9951 }
9952 } else {
9953 const char16_t *haystack_start = haystack0.utf16();
9954 for (idx = 0; idx < sl; ++idx) {
9955 hashNeedle = (hashNeedle<<1) + foldCase(ch: needle + idx, start: needle);
9956 hashHaystack = (hashHaystack<<1) + foldCase(ch: haystack + idx, start: haystack_start);
9957 }
9958 hashHaystack -= foldCase(ch: haystack + sl_minus_1, start: haystack_start);
9959
9960 while (haystack <= end) {
9961 hashHaystack += foldCase(ch: haystack + sl_minus_1, start: haystack_start);
9962 if (hashHaystack == hashNeedle
9963 && QtPrivate::compareStrings(lhs: needle0, rhs: sv(haystack), cs: Qt::CaseInsensitive) == 0)
9964 return haystack - haystack0.utf16();
9965
9966 REHASH(foldCase(haystack, haystack_start));
9967 ++haystack;
9968 }
9969 }
9970 return -1;
9971}
9972
9973qsizetype QtPrivate::findString(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9974{
9975 if (haystack.size() < needle.size())
9976 return -1;
9977
9978 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(str: needle);
9979 return QtPrivate::findString(haystack0: haystack, from, needle0: QStringView(reinterpret_cast<const QChar*>(s.constData()), s.size()), cs);
9980}
9981
9982qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9983{
9984 if (haystack.size() < needle.size())
9985 return -1;
9986
9987 if (!QtPrivate::isLatin1(s: needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9988 return -1;
9989
9990 if (needle.size() == 1) {
9991 const char n = needle.front().toLatin1();
9992 return QtPrivate::findString(haystack, from, needle: QLatin1StringView(&n, 1), cs);
9993 }
9994
9995 QVarLengthArray<char> s(needle.size());
9996 qt_to_latin1_unchecked(dst: reinterpret_cast<uchar *>(s.data()), src: needle.utf16(), length: needle.size());
9997 return QtPrivate::findString(haystack, from, needle: QLatin1StringView(s.data(), s.size()), cs);
9998}
9999
10000qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
10001{
10002 if (from < 0)
10003 from += haystack.size();
10004 if (from < 0)
10005 return -1;
10006 qsizetype adjustedSize = haystack.size() - from;
10007 if (adjustedSize < needle.size())
10008 return -1;
10009 if (needle.size() == 0)
10010 return from;
10011
10012 if (cs == Qt::CaseSensitive) {
10013
10014 if (needle.size() == 1) {
10015 Q_ASSERT(haystack.data() != nullptr); // see size check above
10016 if (auto it = memchr(s: haystack.data() + from, c: needle.front().toLatin1(), n: adjustedSize))
10017 return static_cast<const char *>(it) - haystack.data();
10018 return -1;
10019 }
10020
10021 const QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseSensitive);
10022 return matcher.indexIn(haystack, from);
10023 }
10024
10025 // If the needle is sufficiently small we simply iteratively search through
10026 // the haystack. When the needle is too long we use a boyer-moore searcher
10027 // from the standard library, if available. If it is not available then the
10028 // QLatin1Strings are converted to QString and compared as such. Though
10029 // initialization is slower the boyer-moore search it employs still makes up
10030 // for it when haystack and needle are sufficiently long.
10031 // The needle size was chosen by testing various lengths using the
10032 // qstringtokenizer benchmark with the
10033 // "tokenize_qlatin1string_qlatin1string" test.
10034#ifdef Q_CC_MSVC
10035 const qsizetype threshold = 1;
10036#else
10037 const qsizetype threshold = 13;
10038#endif
10039 if (needle.size() <= threshold) {
10040 const auto begin = haystack.begin();
10041 const auto end = haystack.end() - needle.size() + 1;
10042 auto ciMatch = CaseInsensitiveL1::matcher(ch: needle[0].toLatin1());
10043 const qsizetype nlen1 = needle.size() - 1;
10044 for (auto it = std::find_if(first: begin + from, last: end, pred: ciMatch); it != end;
10045 it = std::find_if(first: it + 1, last: end, pred: ciMatch)) {
10046 // In this comparison we skip the first character because we know it's a match
10047 if (!nlen1 || QLatin1StringView(it + 1, nlen1).compare(other: needle.sliced(pos: 1), cs) == 0)
10048 return std::distance(first: begin, last: it);
10049 }
10050 return -1;
10051 }
10052
10053 QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseInsensitive);
10054 return matcher.indexIn(haystack, from);
10055}
10056
10057qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, char16_t needle, Qt::CaseSensitivity cs) noexcept
10058{
10059 return qLastIndexOf(haystack, needle: QChar(needle), from, cs);
10060}
10061
10062qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
10063{
10064 return qLastIndexOf(haystack0: haystack, from, needle0: needle, cs);
10065}
10066
10067qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
10068{
10069 return qLastIndexOf(haystack0: haystack, from, needle0: needle, cs);
10070}
10071
10072qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
10073{
10074 return qLastIndexOf(haystack0: haystack, from, needle0: needle, cs);
10075}
10076
10077qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
10078{
10079 return qLastIndexOf(haystack0: haystack, from, needle0: needle, cs);
10080}
10081
10082#if QT_CONFIG(regularexpression)
10083qsizetype QtPrivate::indexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10084{
10085 if (!re.isValid()) {
10086 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString(View)::indexOf");
10087 return -1;
10088 }
10089
10090 QRegularExpressionMatch match = stringHaystack
10091 ? re.match(subject: *stringHaystack, offset: from)
10092 : re.matchView(subjectView: viewHaystack, offset: from);
10093 if (match.hasMatch()) {
10094 const qsizetype ret = match.capturedStart();
10095 if (rmatch)
10096 *rmatch = std::move(match);
10097 return ret;
10098 }
10099
10100 return -1;
10101}
10102
10103qsizetype QtPrivate::indexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10104{
10105 return indexOf(viewHaystack: haystack, stringHaystack: nullptr, re, from, rmatch);
10106}
10107
10108qsizetype QtPrivate::lastIndexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10109{
10110 if (!re.isValid()) {
10111 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString(View)::lastIndexOf");
10112 return -1;
10113 }
10114
10115 qsizetype endpos = (from < 0) ? (viewHaystack.size() + from + 1) : (from + 1);
10116 QRegularExpressionMatchIterator iterator = stringHaystack
10117 ? re.globalMatch(subject: *stringHaystack)
10118 : re.globalMatchView(subjectView: viewHaystack);
10119 qsizetype lastIndex = -1;
10120 while (iterator.hasNext()) {
10121 QRegularExpressionMatch match = iterator.next();
10122 qsizetype start = match.capturedStart();
10123 if (start < endpos) {
10124 lastIndex = start;
10125 if (rmatch)
10126 *rmatch = std::move(match);
10127 } else {
10128 break;
10129 }
10130 }
10131
10132 return lastIndex;
10133}
10134
10135qsizetype QtPrivate::lastIndexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10136{
10137 return lastIndexOf(viewHaystack: haystack, stringHaystack: nullptr, re, from, rmatch);
10138}
10139
10140bool QtPrivate::contains(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10141{
10142 if (!re.isValid()) {
10143 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString(View)::contains");
10144 return false;
10145 }
10146 QRegularExpressionMatch m = stringHaystack
10147 ? re.match(subject: *stringHaystack)
10148 : re.matchView(subjectView: viewHaystack);
10149 bool hasMatch = m.hasMatch();
10150 if (hasMatch && rmatch)
10151 *rmatch = std::move(m);
10152 return hasMatch;
10153}
10154
10155bool QtPrivate::contains(QStringView haystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10156{
10157 return contains(viewHaystack: haystack, stringHaystack: nullptr, re, rmatch);
10158}
10159
10160qsizetype QtPrivate::count(QStringView haystack, const QRegularExpression &re)
10161{
10162 if (!re.isValid()) {
10163 qtWarnAboutInvalidRegularExpression(pattern: re.pattern(), where: "QString(View)::count");
10164 return 0;
10165 }
10166 qsizetype count = 0;
10167 qsizetype index = -1;
10168 qsizetype len = haystack.size();
10169 while (index <= len - 1) {
10170 QRegularExpressionMatch match = re.matchView(subjectView: haystack, offset: index + 1);
10171 if (!match.hasMatch())
10172 break;
10173 count++;
10174
10175 // Search again, from the next character after the beginning of this
10176 // capture. If the capture starts with a surrogate pair, both together
10177 // count as "one character".
10178 index = match.capturedStart();
10179 if (index < len && haystack[index].isHighSurrogate())
10180 ++index;
10181 }
10182 return count;
10183}
10184
10185#endif // QT_CONFIG(regularexpression)
10186
10187/*!
10188 \since 5.0
10189
10190 Converts a plain text string to an HTML string with
10191 HTML metacharacters \c{<}, \c{>}, \c{&}, and \c{"} replaced by HTML
10192 entities.
10193
10194 Example:
10195
10196 \snippet code/src_corelib_text_qstring.cpp 7
10197*/
10198QString QString::toHtmlEscaped() const
10199{
10200 const auto pos = std::u16string_view(*this).find_first_of(str: u"<>&\"");
10201 if (pos == std::u16string_view::npos)
10202 return *this;
10203 QString rich;
10204 const qsizetype len = size();
10205 rich.reserve(asize: qsizetype(len * 1.1));
10206 rich += qToStringViewIgnoringNull(s: *this).first(n: pos);
10207 for (auto ch : qToStringViewIgnoringNull(s: *this).sliced(pos)) {
10208 if (ch == u'<')
10209 rich += "&lt;"_L1;
10210 else if (ch == u'>')
10211 rich += "&gt;"_L1;
10212 else if (ch == u'&')
10213 rich += "&amp;"_L1;
10214 else if (ch == u'"')
10215 rich += "&quot;"_L1;
10216 else
10217 rich += ch;
10218 }
10219 rich.squeeze();
10220 return rich;
10221}
10222
10223/*!
10224 \macro QStringLiteral(str)
10225 \relates QString
10226
10227 The macro generates the data for a QString out of the string literal \a str
10228 at compile time. Creating a QString from it is free in this case, and the
10229 generated string data is stored in the read-only segment of the compiled
10230 object file.
10231
10232 If you have code that looks like this:
10233
10234 \snippet code/src_corelib_text_qstring.cpp 9
10235
10236 then a temporary QString will be created to be passed as the \c{hasAttribute}
10237 function parameter. This can be quite expensive, as it involves a memory
10238 allocation and the copy/conversion of the data into QString's internal
10239 encoding.
10240
10241 This cost can be avoided by using QStringLiteral instead:
10242
10243 \snippet code/src_corelib_text_qstring.cpp 10
10244
10245 In this case, QString's internal data will be generated at compile time; no
10246 conversion or allocation will occur at runtime.
10247
10248 Using QStringLiteral instead of a double quoted plain C++ string literal can
10249 significantly speed up creation of QString instances from data known at
10250 compile time.
10251
10252 \note QLatin1StringView can still be more efficient than QStringLiteral
10253 when the string is passed to a function that has an overload taking
10254 QLatin1StringView and this overload avoids conversion to QString. For
10255 instance, QString::operator==() can compare to a QLatin1StringView
10256 directly:
10257
10258 \snippet code/src_corelib_text_qstring.cpp 11
10259
10260 \note Some compilers have bugs encoding strings containing characters outside
10261 the US-ASCII character set. Make sure you prefix your string with \c{u} in
10262 those cases. It is optional otherwise.
10263
10264 \sa QByteArrayLiteral
10265*/
10266
10267#if QT_DEPRECATED_SINCE(6, 8)
10268/*!
10269 \fn QtLiterals::operator""_qs(const char16_t *str, size_t size)
10270
10271 \relates QString
10272 \since 6.2
10273 \deprecated [6.8] Use \c _s from Qt::StringLiterals namespace instead.
10274
10275 Literal operator that creates a QString out of the first \a size characters in
10276 the char16_t string literal \a str.
10277
10278 The QString is created at compile time, and the generated string data is stored
10279 in the read-only segment of the compiled object file. Duplicate literals may
10280 share the same read-only memory. This functionality is interchangeable with
10281 QStringLiteral, but saves typing when many string literals are present in the
10282 code.
10283
10284 The following code creates a QString:
10285 \code
10286 auto str = u"hello"_qs;
10287 \endcode
10288
10289 \sa QStringLiteral, QtLiterals::operator""_qba(const char *str, size_t size)
10290*/
10291#endif // QT_DEPRECATED_SINCE(6, 8)
10292
10293/*!
10294 \fn Qt::Literals::StringLiterals::operator""_s(const char16_t *str, size_t size)
10295
10296 \relates QString
10297 \since 6.4
10298
10299 Literal operator that creates a QString out of the first \a size characters in
10300 the char16_t string literal \a str.
10301
10302 The QString is created at compile time, and the generated string data is stored
10303 in the read-only segment of the compiled object file. Duplicate literals may
10304 share the same read-only memory. This functionality is interchangeable with
10305 QStringLiteral, but saves typing when many string literals are present in the
10306 code.
10307
10308 The following code creates a QString:
10309 \code
10310 using namespace Qt::Literals::StringLiterals;
10311
10312 auto str = u"hello"_s;
10313 \endcode
10314
10315 \sa Qt::Literals::StringLiterals
10316*/
10317
10318/*!
10319 \internal
10320 */
10321void QAbstractConcatenable::appendLatin1To(QLatin1StringView in, QChar *out) noexcept
10322{
10323 qt_from_latin1(dst: reinterpret_cast<char16_t *>(out), str: in.data(), size: size_t(in.size()));
10324}
10325
10326/*!
10327 \fn template <typename T> qsizetype erase(QString &s, const T &t)
10328 \relates QString
10329 \since 6.1
10330
10331 Removes all elements that compare equal to \a t from the
10332 string \a s. Returns the number of elements removed, if any.
10333
10334 \sa erase_if
10335*/
10336
10337/*!
10338 \fn template <typename Predicate> qsizetype erase_if(QString &s, Predicate pred)
10339 \relates QString
10340 \since 6.1
10341
10342 Removes all elements for which the predicate \a pred returns true
10343 from the string \a s. Returns the number of elements removed, if
10344 any.
10345
10346 \sa erase
10347*/
10348
10349/*!
10350 \macro const char *qPrintable(const QString &str)
10351 \relates QString
10352
10353 Returns \a str as a \c{const char *}. This is equivalent to
10354 \a{str}.toLocal8Bit().constData().
10355
10356 The char pointer will be invalid after the statement in which
10357 qPrintable() is used. This is because the array returned by
10358 QString::toLocal8Bit() will fall out of scope.
10359
10360 \note qDebug(), qInfo(), qWarning(), qCritical(), qFatal() expect
10361 %s arguments to be UTF-8 encoded, while qPrintable() converts to
10362 local 8-bit encoding. Therefore qUtf8Printable() should be used
10363 for logging strings instead of qPrintable().
10364
10365 \sa qUtf8Printable()
10366*/
10367
10368/*!
10369 \macro const char *qUtf8Printable(const QString &str)
10370 \relates QString
10371 \since 5.4
10372
10373 Returns \a str as a \c{const char *}. This is equivalent to
10374 \a{str}.toUtf8().constData().
10375
10376 The char pointer will be invalid after the statement in which
10377 qUtf8Printable() is used. This is because the array returned by
10378 QString::toUtf8() will fall out of scope.
10379
10380 Example:
10381
10382 \snippet code/src_corelib_text_qstring.cpp qUtf8Printable
10383
10384 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10385*/
10386
10387/*!
10388 \macro const wchar_t *qUtf16Printable(const QString &str)
10389 \relates QString
10390 \since 5.7
10391
10392 Returns \a str as a \c{const ushort *}, but cast to a \c{const wchar_t *}
10393 to avoid warnings. This is equivalent to \a{str}.utf16() plus some casting.
10394
10395 The only useful thing you can do with the return value of this macro is to
10396 pass it to QString::asprintf() for use in a \c{%ls} conversion. In particular,
10397 the return value is \e{not} a valid \c{const wchar_t*}!
10398
10399 In general, the pointer will be invalid after the statement in which
10400 qUtf16Printable() is used. This is because the pointer may have been
10401 obtained from a temporary expression, which will fall out of scope.
10402
10403 Example:
10404
10405 \snippet code/src_corelib_text_qstring.cpp qUtf16Printable
10406
10407 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10408*/
10409
10410QT_END_NAMESPACE
10411
10412#undef REHASH
10413

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of qtbase/src/corelib/text/qstring.cpp