| 1 | // Copyright (C) 2016 The Qt Company Ltd. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
| 3 | // Qt-Security score:critical reason:data-parser |
| 4 | |
| 5 | #include "qbytearraymatcher.h" |
| 6 | |
| 7 | #include <qtconfiginclude.h> |
| 8 | #ifndef QT_BOOTSTRAPPED |
| 9 | # include <private/qtcore-config_p.h> |
| 10 | #endif |
| 11 | |
| 12 | #include <limits.h> |
| 13 | |
| 14 | QT_BEGIN_NAMESPACE |
| 15 | |
| 16 | static inline void bm_init_skiptable(const uchar *cc, qsizetype len, uchar *skiptable) |
| 17 | { |
| 18 | int l = int(qMin(a: len, b: qsizetype(255))); |
| 19 | memset(s: skiptable, c: l, n: 256*sizeof(uchar)); |
| 20 | cc += len - l; |
| 21 | while (l--) |
| 22 | skiptable[*cc++] = l; |
| 23 | } |
| 24 | |
| 25 | static inline qsizetype bm_find(const uchar *cc, qsizetype l, qsizetype index, const uchar *puc, |
| 26 | qsizetype pl, const uchar *skiptable) |
| 27 | { |
| 28 | if (pl == 0) |
| 29 | return index > l ? -1 : index; |
| 30 | const qsizetype pl_minus_one = pl - 1; |
| 31 | |
| 32 | const uchar *current = cc + index + pl_minus_one; |
| 33 | const uchar *end = cc + l; |
| 34 | while (current < end) { |
| 35 | qsizetype skip = skiptable[*current]; |
| 36 | if (!skip) { |
| 37 | // possible match |
| 38 | while (skip < pl) { |
| 39 | if (*(current - skip) != puc[pl_minus_one - skip]) |
| 40 | break; |
| 41 | skip++; |
| 42 | } |
| 43 | if (skip > pl_minus_one) // we have a match |
| 44 | return (current - cc) - skip + 1; |
| 45 | |
| 46 | // in case we don't have a match we are a bit inefficient as we only skip by one |
| 47 | // when we have the non matching char in the string. |
| 48 | if (skiptable[*(current - skip)] == pl) |
| 49 | skip = pl - skip; |
| 50 | else |
| 51 | skip = 1; |
| 52 | } |
| 53 | if (current > end - skip) |
| 54 | break; |
| 55 | current += skip; |
| 56 | } |
| 57 | return -1; // not found |
| 58 | } |
| 59 | |
| 60 | /*! \class QByteArrayMatcher |
| 61 | \inmodule QtCore |
| 62 | \brief The QByteArrayMatcher class holds a sequence of bytes that |
| 63 | can be quickly matched in a byte array. |
| 64 | |
| 65 | \ingroup tools |
| 66 | \ingroup string-processing |
| 67 | |
| 68 | This class is useful when you have a sequence of bytes that you |
| 69 | want to repeatedly match against some byte arrays (perhaps in a |
| 70 | loop), or when you want to search for the same sequence of bytes |
| 71 | multiple times in the same byte array. Using a matcher object and |
| 72 | indexIn() is faster than matching a plain QByteArray with |
| 73 | QByteArray::indexOf() if repeated matching takes place. This |
| 74 | class offers no benefit if you are doing one-off byte array |
| 75 | matches. |
| 76 | |
| 77 | Create the QByteArrayMatcher with the QByteArray you want to |
| 78 | search for. Then call indexIn() on the QByteArray that you want to |
| 79 | search. |
| 80 | |
| 81 | \sa QByteArray, QStringMatcher |
| 82 | */ |
| 83 | |
| 84 | /*! |
| 85 | Constructs an empty byte array matcher that won't match anything. |
| 86 | Call setPattern() to give it a pattern to match. |
| 87 | */ |
| 88 | QByteArrayMatcher::QByteArrayMatcher() |
| 89 | : d(nullptr) |
| 90 | { |
| 91 | p.p = nullptr; |
| 92 | p.l = 0; |
| 93 | memset(s: p.q_skiptable, c: 0, n: sizeof(p.q_skiptable)); |
| 94 | } |
| 95 | |
| 96 | /*! |
| 97 | Constructs a byte array matcher from \a pattern. \a pattern |
| 98 | has the given \a length. Call indexIn() to perform a search. |
| 99 | |
| 100 | \note the data that \a pattern is referencing must remain valid while this |
| 101 | object is used. |
| 102 | */ |
| 103 | QByteArrayMatcher::QByteArrayMatcher(const char *pattern, qsizetype length) : d(nullptr) |
| 104 | { |
| 105 | p.p = reinterpret_cast<const uchar *>(pattern); |
| 106 | if (length < 0) |
| 107 | p.l = qstrlen(str: pattern); |
| 108 | else |
| 109 | p.l = length; |
| 110 | bm_init_skiptable(cc: p.p, len: p.l, skiptable: p.q_skiptable); |
| 111 | } |
| 112 | |
| 113 | /*! |
| 114 | Constructs a byte array matcher that will search for \a pattern. |
| 115 | Call indexIn() to perform a search. |
| 116 | */ |
| 117 | QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern) |
| 118 | : d(nullptr), q_pattern(pattern) |
| 119 | { |
| 120 | p.p = reinterpret_cast<const uchar *>(pattern.constData()); |
| 121 | p.l = pattern.size(); |
| 122 | bm_init_skiptable(cc: p.p, len: p.l, skiptable: p.q_skiptable); |
| 123 | } |
| 124 | |
| 125 | /*! |
| 126 | \fn QByteArrayMatcher::QByteArrayMatcher(QByteArrayView pattern) |
| 127 | \since 6.3 |
| 128 | \overload |
| 129 | |
| 130 | Constructs a byte array matcher that will search for \a pattern. |
| 131 | Call indexIn() to perform a search. |
| 132 | |
| 133 | \note the data that \a pattern is referencing must remain valid while this |
| 134 | object is used. |
| 135 | */ |
| 136 | |
| 137 | /*! |
| 138 | Copies the \a other byte array matcher to this byte array matcher. |
| 139 | */ |
| 140 | QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other) |
| 141 | : d(nullptr) |
| 142 | { |
| 143 | operator=(other); |
| 144 | } |
| 145 | |
| 146 | /*! |
| 147 | Destroys the byte array matcher. |
| 148 | */ |
| 149 | QByteArrayMatcher::~QByteArrayMatcher() |
| 150 | { |
| 151 | Q_UNUSED(d); |
| 152 | } |
| 153 | |
| 154 | /*! |
| 155 | Assigns the \a other byte array matcher to this byte array matcher. |
| 156 | */ |
| 157 | QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other) |
| 158 | { |
| 159 | q_pattern = other.q_pattern; |
| 160 | memcpy(dest: &p, src: &other.p, n: sizeof(p)); |
| 161 | return *this; |
| 162 | } |
| 163 | |
| 164 | /*! |
| 165 | Sets the byte array that this byte array matcher will search for |
| 166 | to \a pattern. |
| 167 | |
| 168 | \sa pattern(), indexIn() |
| 169 | */ |
| 170 | void QByteArrayMatcher::setPattern(const QByteArray &pattern) |
| 171 | { |
| 172 | q_pattern = pattern; |
| 173 | p.p = reinterpret_cast<const uchar *>(pattern.constData()); |
| 174 | p.l = pattern.size(); |
| 175 | bm_init_skiptable(cc: p.p, len: p.l, skiptable: p.q_skiptable); |
| 176 | } |
| 177 | |
| 178 | /*! |
| 179 | Searches the char string \a str, which has length \a len, from |
| 180 | byte position \a from (default 0, i.e. from the first byte), for |
| 181 | the byte array pattern() that was set in the constructor or in the |
| 182 | most recent call to setPattern(). Returns the position where the |
| 183 | pattern() matched in \a str, or -1 if no match was found. |
| 184 | */ |
| 185 | qsizetype QByteArrayMatcher::indexIn(const char *str, qsizetype len, qsizetype from) const |
| 186 | { |
| 187 | if (from < 0) |
| 188 | from = 0; |
| 189 | return bm_find(cc: reinterpret_cast<const uchar *>(str), l: len, index: from, |
| 190 | puc: p.p, pl: p.l, skiptable: p.q_skiptable); |
| 191 | } |
| 192 | |
| 193 | /*! |
| 194 | \fn qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from) const |
| 195 | \since 6.3 |
| 196 | \overload |
| 197 | |
| 198 | Searches the byte array \a data, from byte position \a from (default |
| 199 | 0, i.e. from the first byte), for the byte array pattern() that |
| 200 | was set in the constructor or in the most recent call to |
| 201 | setPattern(). Returns the position where the pattern() matched in |
| 202 | \a data, or -1 if no match was found. |
| 203 | */ |
| 204 | qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from) const |
| 205 | { |
| 206 | if (from < 0) |
| 207 | from = 0; |
| 208 | return bm_find(cc: reinterpret_cast<const uchar *>(data.data()), l: data.size(), index: from, |
| 209 | puc: p.p, pl: p.l, skiptable: p.q_skiptable); |
| 210 | } |
| 211 | |
| 212 | /*! |
| 213 | \fn QByteArray QByteArrayMatcher::pattern() const |
| 214 | |
| 215 | Returns the byte array pattern that this byte array matcher will |
| 216 | search for. |
| 217 | |
| 218 | \sa setPattern() |
| 219 | */ |
| 220 | |
| 221 | /*! |
| 222 | \internal |
| 223 | */ |
| 224 | Q_NEVER_INLINE |
| 225 | static qsizetype qFindByteArrayBoyerMoore( |
| 226 | const char *haystack, qsizetype haystackLen, qsizetype haystackOffset, |
| 227 | const char *needle, qsizetype needleLen) |
| 228 | { |
| 229 | uchar skiptable[256]; |
| 230 | bm_init_skiptable(cc: (const uchar *)needle, len: needleLen, skiptable); |
| 231 | if (haystackOffset < 0) |
| 232 | haystackOffset = 0; |
| 233 | return bm_find(cc: (const uchar *)haystack, l: haystackLen, index: haystackOffset, |
| 234 | puc: (const uchar *)needle, pl: needleLen, skiptable); |
| 235 | } |
| 236 | |
| 237 | /*! |
| 238 | \internal |
| 239 | */ |
| 240 | static qsizetype qFindByteArray(const char *haystack0, qsizetype l, qsizetype from, |
| 241 | const char *needle, qsizetype sl); |
| 242 | qsizetype QtPrivate::findByteArray(QByteArrayView haystack, qsizetype from, QByteArrayView needle) noexcept |
| 243 | { |
| 244 | const auto haystack0 = haystack.data(); |
| 245 | const auto l = haystack.size(); |
| 246 | const auto sl = needle.size(); |
| 247 | #if !QT_CONFIG(memmem) |
| 248 | if (sl == 1) |
| 249 | return findByteArray(haystack, from, needle.front()); |
| 250 | #endif |
| 251 | |
| 252 | if (from < 0) |
| 253 | from += l; |
| 254 | if (std::size_t(sl + from) > std::size_t(l)) |
| 255 | return -1; |
| 256 | if (!sl) |
| 257 | return from; |
| 258 | if (!l) |
| 259 | return -1; |
| 260 | |
| 261 | #if QT_CONFIG(memmem) |
| 262 | auto where = memmem(haystack: haystack0 + from, haystacklen: l - from, needle: needle.data(), needlelen: sl); |
| 263 | return where ? static_cast<const char *>(where) - haystack0 : -1; |
| 264 | #endif |
| 265 | |
| 266 | /* |
| 267 | We use the Boyer-Moore algorithm in cases where the overhead |
| 268 | for the skip table should pay off, otherwise we use a simple |
| 269 | hash function. |
| 270 | */ |
| 271 | if (l > 500 && sl > 5) |
| 272 | return qFindByteArrayBoyerMoore(haystack: haystack0, haystackLen: l, haystackOffset: from, needle: needle.data(), needleLen: sl); |
| 273 | return qFindByteArray(haystack0, l, from, needle: needle.data(), sl); |
| 274 | } |
| 275 | |
| 276 | qsizetype qFindByteArray(const char *haystack0, qsizetype l, qsizetype from, |
| 277 | const char *needle, qsizetype sl) |
| 278 | { |
| 279 | /* |
| 280 | We use some hashing for efficiency's sake. Instead of |
| 281 | comparing strings, we compare the hash value of str with that |
| 282 | of a part of this QByteArray. Only if that matches, we call memcmp(). |
| 283 | */ |
| 284 | const char *haystack = haystack0 + from; |
| 285 | const char *end = haystack0 + (l - sl); |
| 286 | const qregisteruint sl_minus_1 = sl - 1; |
| 287 | qregisteruint hashNeedle = 0, hashHaystack = 0; |
| 288 | qsizetype idx; |
| 289 | for (idx = 0; idx < sl; ++idx) { |
| 290 | hashNeedle = ((hashNeedle<<1) + needle[idx]); |
| 291 | hashHaystack = ((hashHaystack<<1) + haystack[idx]); |
| 292 | } |
| 293 | hashHaystack -= *(haystack + sl_minus_1); |
| 294 | |
| 295 | while (haystack <= end) { |
| 296 | hashHaystack += *(haystack + sl_minus_1); |
| 297 | if (hashHaystack == hashNeedle && *needle == *haystack |
| 298 | && memcmp(s1: needle, s2: haystack, n: sl) == 0) |
| 299 | return haystack - haystack0; |
| 300 | |
| 301 | if (sl_minus_1 < sizeof(sl_minus_1) * CHAR_BIT) |
| 302 | hashHaystack -= qregisteruint(*haystack) << sl_minus_1; |
| 303 | hashHaystack <<= 1; |
| 304 | ++haystack; |
| 305 | } |
| 306 | return -1; |
| 307 | } |
| 308 | |
| 309 | /*! |
| 310 | \class QStaticByteArrayMatcherBase |
| 311 | \since 5.9 |
| 312 | \internal |
| 313 | \brief Non-template base class of QStaticByteArrayMatcher. |
| 314 | */ |
| 315 | |
| 316 | /*! |
| 317 | \class QStaticByteArrayMatcher |
| 318 | \since 5.9 |
| 319 | \inmodule QtCore |
| 320 | \brief The QStaticByteArrayMatcher class is a compile-time version of QByteArrayMatcher. |
| 321 | |
| 322 | \ingroup tools |
| 323 | \ingroup string-processing |
| 324 | |
| 325 | This class is useful when you have a sequence of bytes that you |
| 326 | want to repeatedly match against some byte arrays (perhaps in a |
| 327 | loop), or when you want to search for the same sequence of bytes |
| 328 | multiple times in the same byte array. Using a matcher object and |
| 329 | indexIn() is faster than matching a plain QByteArray with |
| 330 | QByteArray::indexOf(), in particular if repeated matching takes place. |
| 331 | |
| 332 | Unlike QByteArrayMatcher, this class calculates the internal |
| 333 | representation at \e{compile-time}, so it can |
| 334 | even benefit if you are doing one-off byte array matches. |
| 335 | |
| 336 | Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(), |
| 337 | passing it the C string literal you want to search for. Store the return |
| 338 | value of that function in a \c{static const auto} variable, so you don't need |
| 339 | to pass the \c{N} template parameter explicitly: |
| 340 | |
| 341 | \snippet code/src_corelib_text_qbytearraymatcher.cpp 0 |
| 342 | |
| 343 | Then call indexIn() on the QByteArray in which you want to search, just like |
| 344 | with QByteArrayMatcher. |
| 345 | |
| 346 | Since this class is designed to do all the up-front calculations at compile-time, |
| 347 | it does not offer a setPattern() method. |
| 348 | |
| 349 | \sa QByteArrayMatcher, QStringMatcher |
| 350 | */ |
| 351 | |
| 352 | /*! |
| 353 | \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const |
| 354 | |
| 355 | Searches the char string \a haystack, which has length \a hlen, from |
| 356 | byte position \a from (default 0, i.e. from the first byte), for |
| 357 | the byte array pattern() that was set in the constructor. |
| 358 | |
| 359 | Returns the position where the pattern() matched in \a haystack, or -1 if no match was found. |
| 360 | */ |
| 361 | |
| 362 | /*! |
| 363 | \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, qsizetype from = 0) const |
| 364 | |
| 365 | Searches the char string \a haystack, from byte position \a from |
| 366 | (default 0, i.e. from the first byte), for the byte array pattern() |
| 367 | that was set in the constructor. |
| 368 | |
| 369 | Returns the position where the pattern() matched in \a haystack, or -1 if no match was found. |
| 370 | */ |
| 371 | |
| 372 | /*! |
| 373 | \fn template <size_t N> QByteArray QStaticByteArrayMatcher<N>::pattern() const |
| 374 | |
| 375 | Returns the byte array pattern that this byte array matcher will |
| 376 | search for. |
| 377 | |
| 378 | \sa QByteArrayMatcher::setPattern() |
| 379 | */ |
| 380 | |
| 381 | /*! |
| 382 | \internal |
| 383 | */ |
| 384 | qsizetype QStaticByteArrayMatcherBase::indexOfIn(const char *needle, size_t nlen, const char *haystack, qsizetype hlen, qsizetype from) const noexcept |
| 385 | { |
| 386 | if (from < 0) |
| 387 | from = 0; |
| 388 | return bm_find(cc: reinterpret_cast<const uchar *>(haystack), l: hlen, index: from, |
| 389 | puc: reinterpret_cast<const uchar *>(needle), pl: nlen, skiptable: m_skiptable.data); |
| 390 | } |
| 391 | |
| 392 | /*! |
| 393 | \fn template <size_t N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N]) |
| 394 | \internal |
| 395 | */ |
| 396 | |
| 397 | /*! |
| 398 | \fn template <size_t N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N]) |
| 399 | \since 5.9 |
| 400 | \relates QStaticByteArrayMatcher |
| 401 | |
| 402 | Return a QStaticByteArrayMatcher with the correct \c{N} determined |
| 403 | automatically from the \a pattern passed. |
| 404 | |
| 405 | To take full advantage of this function, assign the result to an |
| 406 | \c{auto} variable: |
| 407 | |
| 408 | \snippet code/src_corelib_text_qbytearraymatcher.cpp 1 |
| 409 | */ |
| 410 | |
| 411 | QT_END_NAMESPACE |
| 412 | |