1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qbytearraymatcher.h"
41
42#include <limits.h>
43
44QT_BEGIN_NAMESPACE
45
46static inline void bm_init_skiptable(const uchar *cc, int len, uchar *skiptable)
47{
48 int l = qMin(a: len, b: 255);
49 memset(s: skiptable, c: l, n: 256*sizeof(uchar));
50 cc += len - l;
51 while (l--)
52 skiptable[*cc++] = l;
53}
54
55static inline int bm_find(const uchar *cc, int l, int index, const uchar *puc, uint pl,
56 const uchar *skiptable)
57{
58 if (pl == 0)
59 return index > l ? -1 : index;
60 const uint pl_minus_one = pl - 1;
61
62 const uchar *current = cc + index + pl_minus_one;
63 const uchar *end = cc + l;
64 while (current < end) {
65 uint skip = skiptable[*current];
66 if (!skip) {
67 // possible match
68 while (skip < pl) {
69 if (*(current - skip) != puc[pl_minus_one - skip])
70 break;
71 skip++;
72 }
73 if (skip > pl_minus_one) // we have a match
74 return (current - cc) - skip + 1;
75
76 // in case we don't have a match we are a bit inefficient as we only skip by one
77 // when we have the non matching char in the string.
78 if (skiptable[*(current - skip)] == pl)
79 skip = pl - skip;
80 else
81 skip = 1;
82 }
83 if (current > end - skip)
84 break;
85 current += skip;
86 }
87 return -1; // not found
88}
89
90/*! \class QByteArrayMatcher
91 \inmodule QtCore
92 \brief The QByteArrayMatcher class holds a sequence of bytes that
93 can be quickly matched in a byte array.
94
95 \ingroup tools
96 \ingroup string-processing
97
98 This class is useful when you have a sequence of bytes that you
99 want to repeatedly match against some byte arrays (perhaps in a
100 loop), or when you want to search for the same sequence of bytes
101 multiple times in the same byte array. Using a matcher object and
102 indexIn() is faster than matching a plain QByteArray with
103 QByteArray::indexOf() if repeated matching takes place. This
104 class offers no benefit if you are doing one-off byte array
105 matches.
106
107 Create the QByteArrayMatcher with the QByteArray you want to
108 search for. Then call indexIn() on the QByteArray that you want to
109 search.
110
111 \sa QByteArray, QStringMatcher
112*/
113
114/*!
115 Constructs an empty byte array matcher that won't match anything.
116 Call setPattern() to give it a pattern to match.
117*/
118QByteArrayMatcher::QByteArrayMatcher()
119 : d(nullptr)
120{
121 p.p = nullptr;
122 p.l = 0;
123 memset(s: p.q_skiptable, c: 0, n: sizeof(p.q_skiptable));
124}
125
126/*!
127 Constructs a byte array matcher from \a pattern. \a pattern
128 has the given \a length. \a pattern must remain in scope, but
129 the destructor does not delete \a pattern.
130 */
131QByteArrayMatcher::QByteArrayMatcher(const char *pattern, int length)
132 : d(nullptr)
133{
134 p.p = reinterpret_cast<const uchar *>(pattern);
135 p.l = length;
136 bm_init_skiptable(cc: p.p, len: p.l, skiptable: p.q_skiptable);
137}
138
139/*!
140 Constructs a byte array matcher that will search for \a pattern.
141 Call indexIn() to perform a search.
142*/
143QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern)
144 : d(nullptr), q_pattern(pattern)
145{
146 p.p = reinterpret_cast<const uchar *>(pattern.constData());
147 p.l = pattern.size();
148 bm_init_skiptable(cc: p.p, len: p.l, skiptable: p.q_skiptable);
149}
150
151/*!
152 Copies the \a other byte array matcher to this byte array matcher.
153*/
154QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other)
155 : d(nullptr)
156{
157 operator=(other);
158}
159
160/*!
161 Destroys the byte array matcher.
162*/
163QByteArrayMatcher::~QByteArrayMatcher()
164{
165 Q_UNUSED(d);
166}
167
168/*!
169 Assigns the \a other byte array matcher to this byte array matcher.
170*/
171QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other)
172{
173 q_pattern = other.q_pattern;
174 memcpy(dest: &p, src: &other.p, n: sizeof(p));
175 return *this;
176}
177
178/*!
179 Sets the byte array that this byte array matcher will search for
180 to \a pattern.
181
182 \sa pattern(), indexIn()
183*/
184void QByteArrayMatcher::setPattern(const QByteArray &pattern)
185{
186 q_pattern = pattern;
187 p.p = reinterpret_cast<const uchar *>(pattern.constData());
188 p.l = pattern.size();
189 bm_init_skiptable(cc: p.p, len: p.l, skiptable: p.q_skiptable);
190}
191
192/*!
193 Searches the byte array \a ba, from byte position \a from (default
194 0, i.e. from the first byte), for the byte array pattern() that
195 was set in the constructor or in the most recent call to
196 setPattern(). Returns the position where the pattern() matched in
197 \a ba, or -1 if no match was found.
198*/
199int QByteArrayMatcher::indexIn(const QByteArray &ba, int from) const
200{
201 if (from < 0)
202 from = 0;
203 return bm_find(cc: reinterpret_cast<const uchar *>(ba.constData()), l: ba.size(), index: from,
204 puc: p.p, pl: p.l, skiptable: p.q_skiptable);
205}
206
207/*!
208 Searches the char string \a str, which has length \a len, from
209 byte position \a from (default 0, i.e. from the first byte), for
210 the byte array pattern() that was set in the constructor or in the
211 most recent call to setPattern(). Returns the position where the
212 pattern() matched in \a str, or -1 if no match was found.
213*/
214int QByteArrayMatcher::indexIn(const char *str, int len, int from) const
215{
216 if (from < 0)
217 from = 0;
218 return bm_find(cc: reinterpret_cast<const uchar *>(str), l: len, index: from,
219 puc: p.p, pl: p.l, skiptable: p.q_skiptable);
220}
221
222/*!
223 \fn QByteArray QByteArrayMatcher::pattern() const
224
225 Returns the byte array pattern that this byte array matcher will
226 search for.
227
228 \sa setPattern()
229*/
230
231
232static int findChar(const char *str, int len, char ch, int from)
233{
234 const uchar *s = (const uchar *)str;
235 uchar c = (uchar)ch;
236 if (from < 0)
237 from = qMax(a: from + len, b: 0);
238 if (from < len) {
239 const uchar *n = s + from - 1;
240 const uchar *e = s + len;
241 while (++n != e)
242 if (*n == c)
243 return n - s;
244 }
245 return -1;
246}
247
248/*!
249 \internal
250 */
251static int qFindByteArrayBoyerMoore(
252 const char *haystack, int haystackLen, int haystackOffset,
253 const char *needle, int needleLen)
254{
255 uchar skiptable[256];
256 bm_init_skiptable(cc: (const uchar *)needle, len: needleLen, skiptable);
257 if (haystackOffset < 0)
258 haystackOffset = 0;
259 return bm_find(cc: (const uchar *)haystack, l: haystackLen, index: haystackOffset,
260 puc: (const uchar *)needle, pl: needleLen, skiptable);
261}
262
263#define REHASH(a) \
264 if (sl_minus_1 < sizeof(uint) * CHAR_BIT) \
265 hashHaystack -= uint(a) << sl_minus_1; \
266 hashHaystack <<= 1
267
268/*!
269 \internal
270 */
271int qFindByteArray(
272 const char *haystack0, int haystackLen, int from,
273 const char *needle, int needleLen)
274{
275 const int l = haystackLen;
276 const int sl = needleLen;
277 if (from < 0)
278 from += l;
279 if (uint(sl + from) > (uint)l)
280 return -1;
281 if (!sl)
282 return from;
283 if (!l)
284 return -1;
285
286 if (sl == 1)
287 return findChar(str: haystack0, len: haystackLen, ch: needle[0], from);
288
289 /*
290 We use the Boyer-Moore algorithm in cases where the overhead
291 for the skip table should pay off, otherwise we use a simple
292 hash function.
293 */
294 if (l > 500 && sl > 5)
295 return qFindByteArrayBoyerMoore(haystack: haystack0, haystackLen, haystackOffset: from,
296 needle, needleLen);
297
298 /*
299 We use some hashing for efficiency's sake. Instead of
300 comparing strings, we compare the hash value of str with that
301 of a part of this QString. Only if that matches, we call memcmp().
302 */
303 const char *haystack = haystack0 + from;
304 const char *end = haystack0 + (l - sl);
305 const uint sl_minus_1 = sl - 1;
306 uint hashNeedle = 0, hashHaystack = 0;
307 int idx;
308 for (idx = 0; idx < sl; ++idx) {
309 hashNeedle = ((hashNeedle<<1) + needle[idx]);
310 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
311 }
312 hashHaystack -= *(haystack + sl_minus_1);
313
314 while (haystack <= end) {
315 hashHaystack += *(haystack + sl_minus_1);
316 if (hashHaystack == hashNeedle && *needle == *haystack
317 && memcmp(s1: needle, s2: haystack, n: sl) == 0)
318 return haystack - haystack0;
319
320 REHASH(*haystack);
321 ++haystack;
322 }
323 return -1;
324}
325
326/*!
327 \class QStaticByteArrayMatcherBase
328 \since 5.9
329 \internal
330 \brief Non-template base class of QStaticByteArrayMatcher.
331*/
332
333/*!
334 \class QStaticByteArrayMatcher
335 \since 5.9
336 \inmodule QtCore
337 \brief The QStaticByteArrayMatcher class is a compile-time version of QByteArrayMatcher.
338
339 \ingroup tools
340 \ingroup string-processing
341
342 This class is useful when you have a sequence of bytes that you
343 want to repeatedly match against some byte arrays (perhaps in a
344 loop), or when you want to search for the same sequence of bytes
345 multiple times in the same byte array. Using a matcher object and
346 indexIn() is faster than matching a plain QByteArray with
347 QByteArray::indexOf(), in particular if repeated matching takes place.
348
349 Unlike QByteArrayMatcher, this class calculates the internal
350 representation at \e{compile-time}, if your compiler supports
351 C++14-level \c{constexpr} (C++11 is not sufficient), so it can
352 even benefit if you are doing one-off byte array matches.
353
354 Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(),
355 passing it the C string literal you want to search for. Store the return
356 value of that function in a \c{static const auto} variable, so you don't need
357 to pass the \c{N} template parameter explicitly:
358
359 \snippet code/src_corelib_tools_qbytearraymatcher.cpp 0
360
361 Then call indexIn() on the QByteArray in which you want to search, just like
362 with QByteArrayMatcher.
363
364 Since this class is designed to do all the up-front calculations at compile-time,
365 it does not offer a setPattern() method.
366
367 \note Qt detects the necessary C++14 compiler support by way of the feature
368 test recommendations from
369 \l{https://isocpp.org/std/standing-documents/sd-6-sg10-feature-test-recommendations}
370 {C++ Committee's Standing Document 6}.
371
372 \sa QByteArrayMatcher, QStringMatcher
373*/
374
375/*!
376 \fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const char *haystack, int hlen, int from = 0) const
377
378 Searches the char string \a haystack, which has length \a hlen, from
379 byte position \a from (default 0, i.e. from the first byte), for
380 the byte array pattern() that was set in the constructor.
381
382 Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
383*/
384
385/*!
386 \fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, int from = 0) const
387
388 Searches the char string \a haystack, from byte position \a from
389 (default 0, i.e. from the first byte), for the byte array pattern()
390 that was set in the constructor.
391
392 Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
393*/
394
395/*!
396 \fn template <uint N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
397
398 Returns the byte array pattern that this byte array matcher will
399 search for.
400
401 \sa QByteArrayMatcher::setPattern()
402*/
403
404/*!
405 \internal
406*/
407int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept
408{
409 if (from < 0)
410 from = 0;
411 return bm_find(cc: reinterpret_cast<const uchar *>(haystack), l: hlen, index: from,
412 puc: reinterpret_cast<const uchar *>(needle), pl: nlen, skiptable: m_skiptable.data);
413}
414
415/*!
416 \fn template <uint N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
417 \internal
418*/
419
420/*!
421 \fn template <uint N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
422 \since 5.9
423 \relates QStaticByteArrayMatcher
424
425 Return a QStaticByteArrayMatcher with the correct \c{N} determined
426 automatically from the \a pattern passed.
427
428 To take full advantage of this function, assign the result to an
429 \c{auto} variable:
430
431 \snippet code/src_corelib_tools_qbytearraymatcher.cpp 1
432*/
433
434
435QT_END_NAMESPACE
436

source code of qtbase/src/corelib/text/qbytearraymatcher.cpp