| 1 | // Copyright (C) 2016 Intel Corporation. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
| 3 | |
| 4 | #ifndef QSTRINGALGORITHMS_P_H |
| 5 | #define QSTRINGALGORITHMS_P_H |
| 6 | |
| 7 | // |
| 8 | // W A R N I N G |
| 9 | // ------------- |
| 10 | // |
| 11 | // This file is not part of the Qt API. It exists for the convenience |
| 12 | // of internal files. This header file may change from version to version |
| 13 | // without notice, or even be removed. |
| 14 | // |
| 15 | // We mean it. |
| 16 | // |
| 17 | |
| 18 | #include "qspan.h" |
| 19 | #include "qstring.h" |
| 20 | #include "qlocale_p.h" // for ascii_isspace |
| 21 | |
| 22 | QT_BEGIN_NAMESPACE |
| 23 | |
| 24 | template <typename StringType> struct QStringAlgorithms |
| 25 | { |
| 26 | typedef typename StringType::value_type Char; |
| 27 | typedef typename StringType::size_type size_type; |
| 28 | typedef typename std::remove_cv<StringType>::type NakedStringType; |
| 29 | using ViewType = |
| 30 | std::conditional_t<std::is_same_v<StringType, QString>, QStringView, QByteArrayView>; |
| 31 | using ViewChar = typename ViewType::storage_type; |
| 32 | static const bool isConst = std::is_const<StringType>::value; |
| 33 | |
| 34 | static inline bool isSpace(char ch) { return ascii_isspace(c: ch); } |
| 35 | static inline bool isSpace(QChar ch) { return ch.isSpace(); } |
| 36 | |
| 37 | // Surrogate pairs are not handled in either of the functions below. That is |
| 38 | // not a problem because there are no space characters (Zs, Zl, Zp) outside the |
| 39 | // Basic Multilingual Plane. |
| 40 | |
| 41 | static inline StringType trimmed_helper_inplace(NakedStringType &str, const Char *begin, const Char *end) |
| 42 | { |
| 43 | // in-place trimming: |
| 44 | Char *data = const_cast<Char *>(str.cbegin()); |
| 45 | if (begin != data) |
| 46 | memmove(data, begin, (end - begin) * sizeof(Char)); |
| 47 | str.resize(end - begin); |
| 48 | return std::move(str); |
| 49 | } |
| 50 | |
| 51 | static inline StringType trimmed_helper_inplace(const NakedStringType &, const Char *, const Char *) |
| 52 | { |
| 53 | // can't happen |
| 54 | Q_UNREACHABLE_RETURN(StringType()); |
| 55 | } |
| 56 | |
| 57 | struct TrimPositions { |
| 58 | const Char *begin; |
| 59 | const Char *end; |
| 60 | }; |
| 61 | // Returns {begin, end} where: |
| 62 | // - "begin" refers to the first non-space character |
| 63 | // - if there is a sequence of one or more space chacaters at the end, |
| 64 | // "end" refers to the first character in that sequence, otherwise |
| 65 | // "end" is str.cend() |
| 66 | [[nodiscard]] static TrimPositions trimmed_helper_positions(const StringType &str) |
| 67 | { |
| 68 | const Char *begin = str.cbegin(); |
| 69 | const Char *end = str.cend(); |
| 70 | // skip white space from end |
| 71 | while (begin < end && isSpace(end[-1])) |
| 72 | --end; |
| 73 | // skip white space from start |
| 74 | while (begin < end && isSpace(*begin)) |
| 75 | begin++; |
| 76 | return {begin, end}; |
| 77 | } |
| 78 | |
| 79 | static inline StringType trimmed_helper(StringType &str) |
| 80 | { |
| 81 | const auto [begin, end] = trimmed_helper_positions(str); |
| 82 | if (begin == str.cbegin() && end == str.cend()) |
| 83 | return str; |
| 84 | if (!isConst && str.isDetached()) |
| 85 | return trimmed_helper_inplace(str, begin, end); |
| 86 | return StringType(begin, end - begin); |
| 87 | } |
| 88 | |
| 89 | static inline StringType simplified_helper(StringType &str) |
| 90 | { |
| 91 | if (str.isEmpty()) |
| 92 | return str; |
| 93 | const Char *src = str.cbegin(); |
| 94 | const Char *end = str.cend(); |
| 95 | NakedStringType result = isConst || !str.isDetached() ? |
| 96 | StringType(str.size(), Qt::Uninitialized) : |
| 97 | std::move(str); |
| 98 | |
| 99 | Char *dst = const_cast<Char *>(result.cbegin()); |
| 100 | Char *ptr = dst; |
| 101 | bool unmodified = true; |
| 102 | while (true) { |
| 103 | while (src != end && isSpace(*src)) |
| 104 | ++src; |
| 105 | while (src != end && !isSpace(*src)) |
| 106 | *ptr++ = *src++; |
| 107 | if (src == end) |
| 108 | break; |
| 109 | if (*src != QChar::Space) |
| 110 | unmodified = false; |
| 111 | *ptr++ = QChar::Space; |
| 112 | } |
| 113 | if (ptr != dst && ptr[-1] == QChar::Space) |
| 114 | --ptr; |
| 115 | |
| 116 | qsizetype newlen = ptr - dst; |
| 117 | if (isConst && newlen == str.size() && unmodified) { |
| 118 | // nothing happened, return the original |
| 119 | return str; |
| 120 | } |
| 121 | result.resize(newlen); |
| 122 | return result; |
| 123 | } |
| 124 | |
| 125 | static inline bool needsReallocate(const StringType &str, qsizetype newSize) noexcept |
| 126 | { |
| 127 | const auto capacityAtEnd = str.capacity() - str.data_ptr().freeSpaceAtBegin(); |
| 128 | return newSize > capacityAtEnd; |
| 129 | } |
| 130 | |
| 131 | static inline const ViewChar *asUnicodeChar(ViewType v) |
| 132 | { |
| 133 | if constexpr (sizeof(ViewChar) == sizeof(QChar)) |
| 134 | return v.utf16(); |
| 135 | else |
| 136 | return v.data(); |
| 137 | } |
| 138 | |
| 139 | static inline qsizetype newSize(StringType &src, qsizetype bsize, |
| 140 | ViewType after, QSpan<const qsizetype> indices) |
| 141 | { |
| 142 | if (bsize == after.size()) |
| 143 | return src.size(); |
| 144 | else if (bsize > after.size()) // shrink |
| 145 | return src.size() - indices.size() * (bsize - after.size()); |
| 146 | |
| 147 | // bsize < after.size() |
| 148 | const qsizetype adjust = indices.size() * (after.size() - bsize); |
| 149 | return src.size() + adjust; |
| 150 | } |
| 151 | |
| 152 | // {QString,QByteArray}::resize() but without the extra checks |
| 153 | static inline void setSize(StringType &src, qsizetype newSize) |
| 154 | { |
| 155 | Q_ASSERT(src.isDetached()); |
| 156 | Q_ASSERT(newSize <= src.capacity()); |
| 157 | |
| 158 | auto &d = src.data_ptr(); |
| 159 | d.size = newSize; |
| 160 | d.data()[newSize] = '\0'; |
| 161 | } |
| 162 | |
| 163 | // Instead of detaching, i.e. copying the whole data array then performing the |
| 164 | // replacement, create a new buffer, copy `src` and `after` to it and swap it |
| 165 | // with `src`. |
| 166 | static inline void replace_into_copy(StringType &src, qsizetype bsize, |
| 167 | ViewType after, QSpan<const qsizetype> indices, |
| 168 | qsizetype newlen) |
| 169 | { |
| 170 | StringType tmp{newlen, Qt::Uninitialized}; |
| 171 | auto *to = tmp.data_ptr().data(); |
| 172 | const auto *a = asUnicodeChar(v: after); |
| 173 | auto *const begin = src.data_ptr().data(); |
| 174 | auto *first = begin; |
| 175 | |
| 176 | for (auto i : indices) { |
| 177 | to = std::copy(first, begin + i, to); |
| 178 | to = std::copy(a, a + after.size(), to); |
| 179 | first = begin + i + bsize; |
| 180 | } |
| 181 | std::copy(first, src.data_ptr().end(), to); // remainder |
| 182 | src.swap(tmp); |
| 183 | } |
| 184 | |
| 185 | static inline void replace_equal_len(StringType &src, [[maybe_unused]] qsizetype bsize, |
| 186 | ViewType after, QSpan<const qsizetype> indices) |
| 187 | { |
| 188 | Q_ASSERT(bsize == after.size()); |
| 189 | Q_ASSERT(!src.data_ptr().needsDetach()); |
| 190 | |
| 191 | const auto *a = asUnicodeChar(v: after); |
| 192 | auto *const begin = src.data_ptr().data(); |
| 193 | // before and after have the same length, so no reallocation |
| 194 | for (auto i : indices) |
| 195 | std::copy(a, a + after.size(), begin + i); |
| 196 | } |
| 197 | |
| 198 | static inline void replace_shrink(StringType &src, qsizetype bsize, ViewType after, |
| 199 | QSpan<const qsizetype> indices) |
| 200 | { |
| 201 | Q_ASSERT(bsize > after.size()); |
| 202 | Q_ASSERT(!src.data_ptr().needsDetach()); |
| 203 | |
| 204 | const auto *a = asUnicodeChar(v: after); |
| 205 | auto *const begin = src.data_ptr().data(); // data(), without the detach() check |
| 206 | auto *const end = begin + src.size(); |
| 207 | Q_ASSERT(!indices.isEmpty()); |
| 208 | auto *to = std::copy(a, a + after.size(), begin + indices.front()); |
| 209 | auto *first = begin + indices.front() + bsize; |
| 210 | for (qsizetype i = 1; i < indices.size(); ++i) { |
| 211 | auto *last = begin + indices[i]; |
| 212 | to = std::copy(first, last, to); |
| 213 | qsizetype adjust = i * (bsize - after.size()); |
| 214 | to = std::copy(a, a + after.size(), last - adjust); |
| 215 | first = begin + indices[i] + bsize; |
| 216 | } |
| 217 | to = std::copy(first, end, to); |
| 218 | setSize(src,newSize: to - begin); |
| 219 | } |
| 220 | |
| 221 | static inline void replace_grow(StringType &src, qsizetype bsize, ViewType after, |
| 222 | QSpan<const qsizetype> indices, qsizetype newlen) |
| 223 | { |
| 224 | Q_ASSERT(after.size() > bsize); |
| 225 | Q_ASSERT(!src.data_ptr().needsDetach()); |
| 226 | |
| 227 | // replace in-place, after is longer than before, so replace from the back |
| 228 | const qsizetype oldlen = src.size(); |
| 229 | const auto *a = asUnicodeChar(v: after); |
| 230 | setSize(src, newSize: newlen); |
| 231 | auto *const begin = src.data_ptr().data(); // data(), without the detach() check |
| 232 | auto *last = begin + oldlen; |
| 233 | auto *to = src.data_ptr().end(); |
| 234 | for (auto i = indices.size() - 1; i >= 0; --i) { |
| 235 | auto *first = begin + indices[i] + bsize; |
| 236 | to = std::copy_backward(first, last, to); |
| 237 | to = std::copy_backward(a, a + after.size(), to); |
| 238 | last = begin + indices[i]; |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | static inline void replace_helper(StringType &src, qsizetype bsize, ViewType after, |
| 243 | QSpan<const qsizetype> indices) |
| 244 | { |
| 245 | if (indices.isEmpty()) |
| 246 | return; |
| 247 | |
| 248 | const qsizetype newlen = newSize(src, bsize, after, indices); |
| 249 | if (src.data_ptr().needsDetach() |
| 250 | || (bsize < after.size() && needsReallocate(str: src, newSize: newlen))) { |
| 251 | // Instead of detaching (which would copy the whole data array) then |
| 252 | // performing the replacement, allocate a new string and copy the data |
| 253 | // over from `src` and `after` as needed. |
| 254 | replace_into_copy(src, bsize, after, indices, newlen); |
| 255 | return; |
| 256 | } |
| 257 | |
| 258 | // No detaching or reallocation -> change in-place |
| 259 | if (bsize == after.size()) |
| 260 | replace_equal_len(src, bsize, after, indices); |
| 261 | else if (bsize > after.size()) |
| 262 | replace_shrink(src, bsize, after, indices); |
| 263 | else // bsize < after.size() |
| 264 | replace_grow(src, bsize, after, indices, newlen); |
| 265 | } |
| 266 | }; |
| 267 | |
| 268 | QT_END_NAMESPACE |
| 269 | |
| 270 | #endif // QSTRINGALGORITHMS_P_H |
| 271 | |