1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Copyright (C) 2019 Mail.ru Group. |
5 | ** Contact: https://www.qt.io/licensing/ |
6 | ** |
7 | ** This file is part of the QtCore module of the Qt Toolkit. |
8 | ** |
9 | ** $QT_BEGIN_LICENSE:LGPL$ |
10 | ** Commercial License Usage |
11 | ** Licensees holding valid commercial Qt licenses may use this file in |
12 | ** accordance with the commercial license agreement provided with the |
13 | ** Software or, alternatively, in accordance with the terms contained in |
14 | ** a written agreement between you and The Qt Company. For licensing terms |
15 | ** and conditions see https://www.qt.io/terms-conditions. For further |
16 | ** information use the contact form at https://www.qt.io/contact-us. |
17 | ** |
18 | ** GNU Lesser General Public License Usage |
19 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
20 | ** General Public License version 3 as published by the Free Software |
21 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
22 | ** packaging of this file. Please review the following information to |
23 | ** ensure the GNU Lesser General Public License version 3 requirements |
24 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
25 | ** |
26 | ** GNU General Public License Usage |
27 | ** Alternatively, this file may be used under the terms of the GNU |
28 | ** General Public License version 2.0 or (at your option) the GNU General |
29 | ** Public license version 3 or any later version approved by the KDE Free |
30 | ** Qt Foundation. The licenses are as published by the Free Software |
31 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
32 | ** included in the packaging of this file. Please review the following |
33 | ** information to ensure the GNU General Public License requirements will |
34 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
35 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
36 | ** |
37 | ** $QT_END_LICENSE$ |
38 | ** |
39 | ****************************************************************************/ |
40 | |
41 | #include "qstringmatcher.h" |
42 | |
43 | QT_BEGIN_NAMESPACE |
44 | |
45 | static void bm_init_skiptable(const ushort *uc, qsizetype len, uchar *skiptable, Qt::CaseSensitivity cs) |
46 | { |
47 | int l = int(qMin(a: len, b: qsizetype(255))); |
48 | memset(s: skiptable, c: l, n: 256 * sizeof(uchar)); |
49 | uc += len - l; |
50 | if (cs == Qt::CaseSensitive) { |
51 | while (l--) { |
52 | skiptable[*uc & 0xff] = l; |
53 | ++uc; |
54 | } |
55 | } else { |
56 | const ushort *start = uc; |
57 | while (l--) { |
58 | skiptable[foldCase(ch: uc, start) & 0xff] = l; |
59 | ++uc; |
60 | } |
61 | } |
62 | } |
63 | |
64 | static inline qsizetype bm_find(const ushort *uc, qsizetype l, qsizetype index, const ushort *puc, qsizetype pl, |
65 | const uchar *skiptable, Qt::CaseSensitivity cs) |
66 | { |
67 | if (pl == 0) |
68 | return index > l ? -1 : index; |
69 | const qsizetype pl_minus_one = pl - 1; |
70 | |
71 | const ushort *current = uc + index + pl_minus_one; |
72 | const ushort *end = uc + l; |
73 | if (cs == Qt::CaseSensitive) { |
74 | while (current < end) { |
75 | qsizetype skip = skiptable[*current & 0xff]; |
76 | if (!skip) { |
77 | // possible match |
78 | while (skip < pl) { |
79 | if (*(current - skip) != puc[pl_minus_one-skip]) |
80 | break; |
81 | ++skip; |
82 | } |
83 | if (skip > pl_minus_one) // we have a match |
84 | return (current - uc) - pl_minus_one; |
85 | |
86 | // in case we don't have a match we are a bit inefficient as we only skip by one |
87 | // when we have the non matching char in the string. |
88 | if (skiptable[*(current - skip) & 0xff] == pl) |
89 | skip = pl - skip; |
90 | else |
91 | skip = 1; |
92 | } |
93 | if (current > end - skip) |
94 | break; |
95 | current += skip; |
96 | } |
97 | } else { |
98 | while (current < end) { |
99 | qsizetype skip = skiptable[foldCase(ch: current, start: uc) & 0xff]; |
100 | if (!skip) { |
101 | // possible match |
102 | while (skip < pl) { |
103 | if (foldCase(ch: current - skip, start: uc) != foldCase(ch: puc + pl_minus_one - skip, start: puc)) |
104 | break; |
105 | ++skip; |
106 | } |
107 | if (skip > pl_minus_one) // we have a match |
108 | return (current - uc) - pl_minus_one; |
109 | // in case we don't have a match we are a bit inefficient as we only skip by one |
110 | // when we have the non matching char in the string. |
111 | if (skiptable[foldCase(ch: current - skip, start: uc) & 0xff] == pl) |
112 | skip = pl - skip; |
113 | else |
114 | skip = 1; |
115 | } |
116 | if (current > end - skip) |
117 | break; |
118 | current += skip; |
119 | } |
120 | } |
121 | return -1; // not found |
122 | } |
123 | |
124 | /*! |
125 | \class QStringMatcher |
126 | \inmodule QtCore |
127 | \brief The QStringMatcher class holds a sequence of characters that |
128 | can be quickly matched in a Unicode string. |
129 | |
130 | \ingroup tools |
131 | \ingroup string-processing |
132 | |
133 | This class is useful when you have a sequence of \l{QChar}s that |
134 | you want to repeatedly match against some strings (perhaps in a |
135 | loop), or when you want to search for the same sequence of |
136 | characters multiple times in the same string. Using a matcher |
137 | object and indexIn() is faster than matching a plain QString with |
138 | QString::indexOf() if repeated matching takes place. This class |
139 | offers no benefit if you are doing one-off string matches. |
140 | |
141 | Create the QStringMatcher with the QString you want to search |
142 | for. Then call indexIn() on the QString that you want to search. |
143 | |
144 | \sa QString, QByteArrayMatcher, QRegExp |
145 | */ |
146 | |
147 | /*! |
148 | Constructs an empty string matcher that won't match anything. |
149 | Call setPattern() to give it a pattern to match. |
150 | */ |
151 | QStringMatcher::QStringMatcher() |
152 | : d_ptr(nullptr), q_cs(Qt::CaseSensitive) |
153 | { |
154 | memset(s: q_data, c: 0, n: sizeof(q_data)); |
155 | } |
156 | |
157 | /*! |
158 | Constructs a string matcher that will search for \a pattern, with |
159 | case sensitivity \a cs. |
160 | |
161 | Call indexIn() to perform a search. |
162 | */ |
163 | QStringMatcher::QStringMatcher(const QString &pattern, Qt::CaseSensitivity cs) |
164 | : d_ptr(nullptr), q_pattern(pattern), q_cs(cs) |
165 | { |
166 | p.uc = pattern.unicode(); |
167 | p.len = pattern.size(); |
168 | bm_init_skiptable(uc: (const ushort *)p.uc, len: p.len, skiptable: p.q_skiptable, cs); |
169 | } |
170 | |
171 | /*! |
172 | \fn QStringMatcher::QStringMatcher(const QChar *uc, int length, Qt::CaseSensitivity cs) |
173 | \since 4.5 |
174 | |
175 | Constructs a string matcher that will search for the pattern referred to |
176 | by \a uc with the given \a length and case sensitivity specified by \a cs. |
177 | */ |
178 | QStringMatcher::QStringMatcher(const QChar *uc, int len, Qt::CaseSensitivity cs) |
179 | : QStringMatcher(QStringView(uc, len), cs) |
180 | { |
181 | } |
182 | |
183 | /*! |
184 | \fn QStringMatcher::QStringMatcher(QStringView pattern, Qt::CaseSensitivity cs = Qt::CaseSensitive) |
185 | \since 5.14 |
186 | |
187 | Constructs a string matcher that will search for \a pattern, with |
188 | case sensitivity \a cs. |
189 | |
190 | Call indexIn() to perform a search. |
191 | */ |
192 | QStringMatcher::QStringMatcher(QStringView str, Qt::CaseSensitivity cs) |
193 | : d_ptr(nullptr), q_cs(cs) |
194 | { |
195 | p.uc = str.data(); |
196 | p.len = int(str.size()); |
197 | bm_init_skiptable(uc: (const ushort *)p.uc, len: p.len, skiptable: p.q_skiptable, cs); |
198 | } |
199 | /*! |
200 | Copies the \a other string matcher to this string matcher. |
201 | */ |
202 | QStringMatcher::QStringMatcher(const QStringMatcher &other) |
203 | : d_ptr(nullptr) |
204 | { |
205 | operator=(other); |
206 | } |
207 | |
208 | /*! |
209 | Destroys the string matcher. |
210 | */ |
211 | QStringMatcher::~QStringMatcher() |
212 | { |
213 | Q_UNUSED(d_ptr); |
214 | } |
215 | |
216 | /*! |
217 | Assigns the \a other string matcher to this string matcher. |
218 | */ |
219 | QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other) |
220 | { |
221 | if (this != &other) { |
222 | q_pattern = other.q_pattern; |
223 | q_cs = other.q_cs; |
224 | memcpy(dest: q_data, src: other.q_data, n: sizeof(q_data)); |
225 | } |
226 | return *this; |
227 | } |
228 | |
229 | /*! |
230 | Sets the string that this string matcher will search for to \a |
231 | pattern. |
232 | |
233 | \sa pattern(), setCaseSensitivity(), indexIn() |
234 | */ |
235 | void QStringMatcher::setPattern(const QString &pattern) |
236 | { |
237 | q_pattern = pattern; |
238 | p.uc = pattern.unicode(); |
239 | p.len = pattern.size(); |
240 | bm_init_skiptable(uc: (const ushort *)pattern.unicode(), len: pattern.size(), skiptable: p.q_skiptable, cs: q_cs); |
241 | } |
242 | |
243 | /*! |
244 | \fn QString QStringMatcher::pattern() const |
245 | |
246 | Returns the string pattern that this string matcher will search |
247 | for. |
248 | |
249 | \sa setPattern() |
250 | */ |
251 | |
252 | QString QStringMatcher::pattern() const |
253 | { |
254 | if (!q_pattern.isEmpty()) |
255 | return q_pattern; |
256 | return QString(p.uc, p.len); |
257 | } |
258 | |
259 | /*! |
260 | Sets the case sensitivity setting of this string matcher to \a |
261 | cs. |
262 | |
263 | \sa caseSensitivity(), setPattern(), indexIn() |
264 | */ |
265 | void QStringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs) |
266 | { |
267 | if (cs == q_cs) |
268 | return; |
269 | bm_init_skiptable(uc: (const ushort *)p.uc, len: p.len, skiptable: p.q_skiptable, cs); |
270 | q_cs = cs; |
271 | } |
272 | |
273 | /*! |
274 | Searches the string \a str from character position \a from |
275 | (default 0, i.e. from the first character), for the string |
276 | pattern() that was set in the constructor or in the most recent |
277 | call to setPattern(). Returns the position where the pattern() |
278 | matched in \a str, or -1 if no match was found. |
279 | |
280 | \sa setPattern(), setCaseSensitivity() |
281 | */ |
282 | int QStringMatcher::indexIn(const QString &str, int from) const |
283 | { |
284 | return int(indexIn(str: QStringView(str), from)); |
285 | } |
286 | |
287 | /*! |
288 | \since 4.5 |
289 | |
290 | Searches the string starting at \a str (of length \a length) from |
291 | character position \a from (default 0, i.e. from the first |
292 | character), for the string pattern() that was set in the |
293 | constructor or in the most recent call to setPattern(). Returns |
294 | the position where the pattern() matched in \a str, or -1 if no |
295 | match was found. |
296 | |
297 | \sa setPattern(), setCaseSensitivity() |
298 | */ |
299 | int QStringMatcher::indexIn(const QChar *str, int length, int from) const |
300 | { |
301 | return int(indexIn(str: QStringView(str, length), from)); |
302 | } |
303 | |
304 | /*! |
305 | \since 5.14 |
306 | |
307 | Searches the string \a str from character position \a from |
308 | (default 0, i.e. from the first character), for the string |
309 | pattern() that was set in the constructor or in the most recent |
310 | call to setPattern(). Returns the position where the pattern() |
311 | matched in \a str, or -1 if no match was found. |
312 | |
313 | \sa setPattern(), setCaseSensitivity() |
314 | */ |
315 | qsizetype QStringMatcher::indexIn(QStringView str, qsizetype from) const |
316 | { |
317 | if (from < 0) |
318 | from = 0; |
319 | return bm_find(uc: (const ushort *)str.data(), l: str.size(), index: from, |
320 | puc: (const ushort *)p.uc, pl: p.len, |
321 | skiptable: p.q_skiptable, cs: q_cs); |
322 | } |
323 | |
324 | /*! |
325 | \fn Qt::CaseSensitivity QStringMatcher::caseSensitivity() const |
326 | |
327 | Returns the case sensitivity setting for this string matcher. |
328 | |
329 | \sa setCaseSensitivity() |
330 | */ |
331 | |
332 | /*! |
333 | \internal |
334 | */ |
335 | |
336 | qsizetype qFindStringBoyerMoore( |
337 | QStringView haystack, qsizetype haystackOffset, |
338 | QStringView needle, Qt::CaseSensitivity cs) |
339 | { |
340 | uchar skiptable[256]; |
341 | bm_init_skiptable(uc: (const ushort *)needle.data(), len: needle.size(), skiptable, cs); |
342 | if (haystackOffset < 0) |
343 | haystackOffset = 0; |
344 | return bm_find(uc: (const ushort *)haystack.data(), l: haystack.size(), index: haystackOffset, |
345 | puc: (const ushort *)needle.data(), pl: needle.size(), skiptable, cs); |
346 | } |
347 | |
348 | QT_END_NAMESPACE |
349 | |