1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Copyright (C) 2019 Mail.ru Group.
5** Contact: https://www.qt.io/licensing/
6**
7** This file is part of the QtCore module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial License Usage
11** Licensees holding valid commercial Qt licenses may use this file in
12** accordance with the commercial license agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and The Qt Company. For licensing terms
15** and conditions see https://www.qt.io/terms-conditions. For further
16** information use the contact form at https://www.qt.io/contact-us.
17**
18** GNU Lesser General Public License Usage
19** Alternatively, this file may be used under the terms of the GNU Lesser
20** General Public License version 3 as published by the Free Software
21** Foundation and appearing in the file LICENSE.LGPL3 included in the
22** packaging of this file. Please review the following information to
23** ensure the GNU Lesser General Public License version 3 requirements
24** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25**
26** GNU General Public License Usage
27** Alternatively, this file may be used under the terms of the GNU
28** General Public License version 2.0 or (at your option) the GNU General
29** Public license version 3 or any later version approved by the KDE Free
30** Qt Foundation. The licenses are as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32** included in the packaging of this file. Please review the following
33** information to ensure the GNU General Public License requirements will
34** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35** https://www.gnu.org/licenses/gpl-3.0.html.
36**
37** $QT_END_LICENSE$
38**
39****************************************************************************/
40
41#include "qstringmatcher.h"
42
43QT_BEGIN_NAMESPACE
44
45static void bm_init_skiptable(const ushort *uc, qsizetype len, uchar *skiptable, Qt::CaseSensitivity cs)
46{
47 int l = int(qMin(a: len, b: qsizetype(255)));
48 memset(s: skiptable, c: l, n: 256 * sizeof(uchar));
49 uc += len - l;
50 if (cs == Qt::CaseSensitive) {
51 while (l--) {
52 skiptable[*uc & 0xff] = l;
53 ++uc;
54 }
55 } else {
56 const ushort *start = uc;
57 while (l--) {
58 skiptable[foldCase(ch: uc, start) & 0xff] = l;
59 ++uc;
60 }
61 }
62}
63
64static inline qsizetype bm_find(const ushort *uc, qsizetype l, qsizetype index, const ushort *puc, qsizetype pl,
65 const uchar *skiptable, Qt::CaseSensitivity cs)
66{
67 if (pl == 0)
68 return index > l ? -1 : index;
69 const qsizetype pl_minus_one = pl - 1;
70
71 const ushort *current = uc + index + pl_minus_one;
72 const ushort *end = uc + l;
73 if (cs == Qt::CaseSensitive) {
74 while (current < end) {
75 qsizetype skip = skiptable[*current & 0xff];
76 if (!skip) {
77 // possible match
78 while (skip < pl) {
79 if (*(current - skip) != puc[pl_minus_one-skip])
80 break;
81 ++skip;
82 }
83 if (skip > pl_minus_one) // we have a match
84 return (current - uc) - pl_minus_one;
85
86 // in case we don't have a match we are a bit inefficient as we only skip by one
87 // when we have the non matching char in the string.
88 if (skiptable[*(current - skip) & 0xff] == pl)
89 skip = pl - skip;
90 else
91 skip = 1;
92 }
93 if (current > end - skip)
94 break;
95 current += skip;
96 }
97 } else {
98 while (current < end) {
99 qsizetype skip = skiptable[foldCase(ch: current, start: uc) & 0xff];
100 if (!skip) {
101 // possible match
102 while (skip < pl) {
103 if (foldCase(ch: current - skip, start: uc) != foldCase(ch: puc + pl_minus_one - skip, start: puc))
104 break;
105 ++skip;
106 }
107 if (skip > pl_minus_one) // we have a match
108 return (current - uc) - pl_minus_one;
109 // in case we don't have a match we are a bit inefficient as we only skip by one
110 // when we have the non matching char in the string.
111 if (skiptable[foldCase(ch: current - skip, start: uc) & 0xff] == pl)
112 skip = pl - skip;
113 else
114 skip = 1;
115 }
116 if (current > end - skip)
117 break;
118 current += skip;
119 }
120 }
121 return -1; // not found
122}
123
124/*!
125 \class QStringMatcher
126 \inmodule QtCore
127 \brief The QStringMatcher class holds a sequence of characters that
128 can be quickly matched in a Unicode string.
129
130 \ingroup tools
131 \ingroup string-processing
132
133 This class is useful when you have a sequence of \l{QChar}s that
134 you want to repeatedly match against some strings (perhaps in a
135 loop), or when you want to search for the same sequence of
136 characters multiple times in the same string. Using a matcher
137 object and indexIn() is faster than matching a plain QString with
138 QString::indexOf() if repeated matching takes place. This class
139 offers no benefit if you are doing one-off string matches.
140
141 Create the QStringMatcher with the QString you want to search
142 for. Then call indexIn() on the QString that you want to search.
143
144 \sa QString, QByteArrayMatcher, QRegExp
145*/
146
147/*!
148 Constructs an empty string matcher that won't match anything.
149 Call setPattern() to give it a pattern to match.
150*/
151QStringMatcher::QStringMatcher()
152 : d_ptr(nullptr), q_cs(Qt::CaseSensitive)
153{
154 memset(s: q_data, c: 0, n: sizeof(q_data));
155}
156
157/*!
158 Constructs a string matcher that will search for \a pattern, with
159 case sensitivity \a cs.
160
161 Call indexIn() to perform a search.
162*/
163QStringMatcher::QStringMatcher(const QString &pattern, Qt::CaseSensitivity cs)
164 : d_ptr(nullptr), q_pattern(pattern), q_cs(cs)
165{
166 p.uc = pattern.unicode();
167 p.len = pattern.size();
168 bm_init_skiptable(uc: (const ushort *)p.uc, len: p.len, skiptable: p.q_skiptable, cs);
169}
170
171/*!
172 \fn QStringMatcher::QStringMatcher(const QChar *uc, int length, Qt::CaseSensitivity cs)
173 \since 4.5
174
175 Constructs a string matcher that will search for the pattern referred to
176 by \a uc with the given \a length and case sensitivity specified by \a cs.
177*/
178QStringMatcher::QStringMatcher(const QChar *uc, int len, Qt::CaseSensitivity cs)
179 : QStringMatcher(QStringView(uc, len), cs)
180{
181}
182
183/*!
184 \fn QStringMatcher::QStringMatcher(QStringView pattern, Qt::CaseSensitivity cs = Qt::CaseSensitive)
185 \since 5.14
186
187 Constructs a string matcher that will search for \a pattern, with
188 case sensitivity \a cs.
189
190 Call indexIn() to perform a search.
191*/
192QStringMatcher::QStringMatcher(QStringView str, Qt::CaseSensitivity cs)
193 : d_ptr(nullptr), q_cs(cs)
194{
195 p.uc = str.data();
196 p.len = int(str.size());
197 bm_init_skiptable(uc: (const ushort *)p.uc, len: p.len, skiptable: p.q_skiptable, cs);
198}
199/*!
200 Copies the \a other string matcher to this string matcher.
201*/
202QStringMatcher::QStringMatcher(const QStringMatcher &other)
203 : d_ptr(nullptr)
204{
205 operator=(other);
206}
207
208/*!
209 Destroys the string matcher.
210*/
211QStringMatcher::~QStringMatcher()
212{
213 Q_UNUSED(d_ptr);
214}
215
216/*!
217 Assigns the \a other string matcher to this string matcher.
218*/
219QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other)
220{
221 if (this != &other) {
222 q_pattern = other.q_pattern;
223 q_cs = other.q_cs;
224 memcpy(dest: q_data, src: other.q_data, n: sizeof(q_data));
225 }
226 return *this;
227}
228
229/*!
230 Sets the string that this string matcher will search for to \a
231 pattern.
232
233 \sa pattern(), setCaseSensitivity(), indexIn()
234*/
235void QStringMatcher::setPattern(const QString &pattern)
236{
237 q_pattern = pattern;
238 p.uc = pattern.unicode();
239 p.len = pattern.size();
240 bm_init_skiptable(uc: (const ushort *)pattern.unicode(), len: pattern.size(), skiptable: p.q_skiptable, cs: q_cs);
241}
242
243/*!
244 \fn QString QStringMatcher::pattern() const
245
246 Returns the string pattern that this string matcher will search
247 for.
248
249 \sa setPattern()
250*/
251
252QString QStringMatcher::pattern() const
253{
254 if (!q_pattern.isEmpty())
255 return q_pattern;
256 return QString(p.uc, p.len);
257}
258
259/*!
260 Sets the case sensitivity setting of this string matcher to \a
261 cs.
262
263 \sa caseSensitivity(), setPattern(), indexIn()
264*/
265void QStringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs)
266{
267 if (cs == q_cs)
268 return;
269 bm_init_skiptable(uc: (const ushort *)p.uc, len: p.len, skiptable: p.q_skiptable, cs);
270 q_cs = cs;
271}
272
273/*!
274 Searches the string \a str from character position \a from
275 (default 0, i.e. from the first character), for the string
276 pattern() that was set in the constructor or in the most recent
277 call to setPattern(). Returns the position where the pattern()
278 matched in \a str, or -1 if no match was found.
279
280 \sa setPattern(), setCaseSensitivity()
281*/
282int QStringMatcher::indexIn(const QString &str, int from) const
283{
284 return int(indexIn(str: QStringView(str), from));
285}
286
287/*!
288 \since 4.5
289
290 Searches the string starting at \a str (of length \a length) from
291 character position \a from (default 0, i.e. from the first
292 character), for the string pattern() that was set in the
293 constructor or in the most recent call to setPattern(). Returns
294 the position where the pattern() matched in \a str, or -1 if no
295 match was found.
296
297 \sa setPattern(), setCaseSensitivity()
298*/
299int QStringMatcher::indexIn(const QChar *str, int length, int from) const
300{
301 return int(indexIn(str: QStringView(str, length), from));
302}
303
304/*!
305 \since 5.14
306
307 Searches the string \a str from character position \a from
308 (default 0, i.e. from the first character), for the string
309 pattern() that was set in the constructor or in the most recent
310 call to setPattern(). Returns the position where the pattern()
311 matched in \a str, or -1 if no match was found.
312
313 \sa setPattern(), setCaseSensitivity()
314*/
315qsizetype QStringMatcher::indexIn(QStringView str, qsizetype from) const
316{
317 if (from < 0)
318 from = 0;
319 return bm_find(uc: (const ushort *)str.data(), l: str.size(), index: from,
320 puc: (const ushort *)p.uc, pl: p.len,
321 skiptable: p.q_skiptable, cs: q_cs);
322}
323
324/*!
325 \fn Qt::CaseSensitivity QStringMatcher::caseSensitivity() const
326
327 Returns the case sensitivity setting for this string matcher.
328
329 \sa setCaseSensitivity()
330*/
331
332/*!
333 \internal
334*/
335
336qsizetype qFindStringBoyerMoore(
337 QStringView haystack, qsizetype haystackOffset,
338 QStringView needle, Qt::CaseSensitivity cs)
339{
340 uchar skiptable[256];
341 bm_init_skiptable(uc: (const ushort *)needle.data(), len: needle.size(), skiptable, cs);
342 if (haystackOffset < 0)
343 haystackOffset = 0;
344 return bm_find(uc: (const ushort *)haystack.data(), l: haystack.size(), index: haystackOffset,
345 puc: (const ushort *)needle.data(), pl: needle.size(), skiptable, cs);
346}
347
348QT_END_NAMESPACE
349

source code of qtbase/src/corelib/text/qstringmatcher.cpp