1 | // Copyright (C) 2020 The Qt Company Ltd. |
2 | // Copyright (C) 2019 Mail.ru Group. |
3 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
4 | |
5 | #include "qstringmatcher.h" |
6 | |
7 | QT_BEGIN_NAMESPACE |
8 | |
9 | static constexpr qsizetype FoldBufferCapacity = 256; |
10 | |
11 | static void bm_init_skiptable(QStringView needle, uchar *skiptable, Qt::CaseSensitivity cs) |
12 | { |
13 | const char16_t *uc = needle.utf16(); |
14 | const qsizetype len = |
15 | cs == Qt::CaseSensitive ? needle.size() : qMin(a: needle.size(), b: FoldBufferCapacity); |
16 | int l = qMin(a: int(len), b: 255); |
17 | memset(s: skiptable, c: l, n: 256 * sizeof(uchar)); |
18 | uc += len - l; |
19 | if (cs == Qt::CaseSensitive) { |
20 | while (l--) { |
21 | skiptable[*uc & 0xff] = l; |
22 | ++uc; |
23 | } |
24 | } else { |
25 | const char16_t *start = uc; |
26 | while (l--) { |
27 | skiptable[foldCase(ch: uc, start) & 0xff] = l; |
28 | ++uc; |
29 | } |
30 | } |
31 | } |
32 | |
33 | static inline qsizetype bm_find(QStringView haystack, qsizetype index, QStringView needle, |
34 | const uchar *skiptable, Qt::CaseSensitivity cs) |
35 | { |
36 | const char16_t *uc = haystack.utf16(); |
37 | const qsizetype l = haystack.size(); |
38 | const char16_t *puc = needle.utf16(); |
39 | const qsizetype pl = needle.size(); |
40 | |
41 | if (pl == 0) |
42 | return index > l ? -1 : index; |
43 | |
44 | if (cs == Qt::CaseSensitive) { |
45 | const qsizetype pl_minus_one = pl - 1; |
46 | const char16_t *current = uc + index + pl_minus_one; |
47 | const char16_t *end = uc + l; |
48 | |
49 | while (current < end) { |
50 | qsizetype skip = skiptable[*current & 0xff]; |
51 | if (!skip) { |
52 | // possible match |
53 | while (skip < pl) { |
54 | if (*(current - skip) != puc[pl_minus_one-skip]) |
55 | break; |
56 | ++skip; |
57 | } |
58 | if (skip > pl_minus_one) // we have a match |
59 | return (current - uc) - pl_minus_one; |
60 | |
61 | // in case we don't have a match we are a bit inefficient as we only skip by one |
62 | // when we have the non matching char in the string. |
63 | if (skiptable[*(current - skip) & 0xff] == pl) |
64 | skip = pl - skip; |
65 | else |
66 | skip = 1; |
67 | } |
68 | if (current > end - skip) |
69 | break; |
70 | current += skip; |
71 | } |
72 | } else { |
73 | char16_t foldBuffer[FoldBufferCapacity]; |
74 | const qsizetype foldBufferLength = qMin(a: FoldBufferCapacity, b: pl); |
75 | const char16_t *start = puc; |
76 | for (qsizetype i = 0; i < foldBufferLength; ++i) |
77 | foldBuffer[i] = foldCase(ch: &puc[i], start); |
78 | QStringView restNeedle = needle.sliced(pos: foldBufferLength); |
79 | const qsizetype foldBufferEnd = foldBufferLength - 1; |
80 | const char16_t *current = uc + index + foldBufferEnd; |
81 | const char16_t *end = uc + l; |
82 | |
83 | while (current < end) { |
84 | qsizetype skip = skiptable[foldCase(ch: current, start: uc) & 0xff]; |
85 | if (!skip) { |
86 | // possible match |
87 | while (skip < foldBufferLength) { |
88 | if (foldCase(ch: current - skip, start: uc) != foldBuffer[foldBufferEnd - skip]) |
89 | break; |
90 | ++skip; |
91 | } |
92 | if (skip > foldBufferEnd) { // Matching foldBuffer |
93 | qsizetype candidatePos = (current - uc) - foldBufferEnd; |
94 | QStringView restHaystack = |
95 | haystack.sliced(pos: qMin(a: haystack.size(), b: candidatePos + foldBufferLength)); |
96 | if (restNeedle.size() == 0 |
97 | || restHaystack.startsWith( |
98 | s: restNeedle, cs: Qt::CaseInsensitive)) // Check the rest of the string |
99 | return candidatePos; |
100 | } |
101 | // in case we don't have a match we are a bit inefficient as we only skip by one |
102 | // when we have the non matching char in the string. |
103 | if (skiptable[foldCase(ch: current - skip, start: uc) & 0xff] == foldBufferLength) |
104 | skip = foldBufferLength - skip; |
105 | else |
106 | skip = 1; |
107 | } |
108 | if (current > end - skip) |
109 | break; |
110 | current += skip; |
111 | } |
112 | } |
113 | return -1; // not found |
114 | } |
115 | |
116 | void QStringMatcher::updateSkipTable() |
117 | { |
118 | bm_init_skiptable(needle: q_sv, skiptable: q_skiptable, cs: q_cs); |
119 | } |
120 | |
121 | /*! |
122 | \class QStringMatcher |
123 | \inmodule QtCore |
124 | \brief The QStringMatcher class holds a sequence of characters that |
125 | can be quickly matched in a Unicode string. |
126 | |
127 | \ingroup tools |
128 | \ingroup string-processing |
129 | |
130 | This class is useful when you have a sequence of \l{QChar}s that |
131 | you want to repeatedly match against some strings (perhaps in a |
132 | loop), or when you want to search for the same sequence of |
133 | characters multiple times in the same string. Using a matcher |
134 | object and indexIn() is faster than matching a plain QString with |
135 | QString::indexOf() if repeated matching takes place. This class |
136 | offers no benefit if you are doing one-off string matches. |
137 | |
138 | Create the QStringMatcher with the QString you want to search |
139 | for. Then call indexIn() on the QString that you want to search. |
140 | |
141 | \sa QString, QByteArrayMatcher, QRegularExpression |
142 | */ |
143 | |
144 | /*! \fn QStringMatcher::QStringMatcher() |
145 | |
146 | Constructs an empty string matcher that won't match anything. |
147 | Call setPattern() to give it a pattern to match. |
148 | */ |
149 | |
150 | /*! |
151 | Constructs a string matcher that will search for \a pattern, with |
152 | case sensitivity \a cs. |
153 | |
154 | Call indexIn() to perform a search. |
155 | */ |
156 | QStringMatcher::QStringMatcher(const QString &pattern, Qt::CaseSensitivity cs) |
157 | : d_ptr(nullptr), q_cs(cs), q_pattern(pattern) |
158 | { |
159 | q_sv = q_pattern; |
160 | updateSkipTable(); |
161 | } |
162 | |
163 | /*! |
164 | \fn QStringMatcher::QStringMatcher(const QChar *uc, qsizetype length, Qt::CaseSensitivity cs) |
165 | \since 4.5 |
166 | |
167 | Constructs a string matcher that will search for the pattern referred to |
168 | by \a uc with the given \a length and case sensitivity specified by \a cs. |
169 | */ |
170 | |
171 | /*! |
172 | \fn QStringMatcher::QStringMatcher(QStringView pattern, Qt::CaseSensitivity cs = Qt::CaseSensitive) |
173 | \since 5.14 |
174 | |
175 | Constructs a string matcher that will search for \a pattern, with |
176 | case sensitivity \a cs. |
177 | |
178 | Call indexIn() to perform a search. |
179 | */ |
180 | QStringMatcher::QStringMatcher(QStringView str, Qt::CaseSensitivity cs) |
181 | : d_ptr(nullptr), q_cs(cs), q_sv(str) |
182 | { |
183 | updateSkipTable(); |
184 | } |
185 | /*! |
186 | Copies the \a other string matcher to this string matcher. |
187 | */ |
188 | QStringMatcher::QStringMatcher(const QStringMatcher &other) |
189 | : d_ptr(nullptr) |
190 | { |
191 | operator=(other); |
192 | } |
193 | |
194 | /*! |
195 | Destroys the string matcher. |
196 | */ |
197 | QStringMatcher::~QStringMatcher() |
198 | { |
199 | Q_UNUSED(d_ptr); |
200 | } |
201 | |
202 | /*! |
203 | Assigns the \a other string matcher to this string matcher. |
204 | */ |
205 | QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other) |
206 | { |
207 | if (this != &other) { |
208 | q_pattern = other.q_pattern; |
209 | q_cs = other.q_cs; |
210 | q_sv = other.q_sv; |
211 | memcpy(dest: q_skiptable, src: other.q_skiptable, n: sizeof(q_skiptable)); |
212 | } |
213 | return *this; |
214 | } |
215 | |
216 | /*! |
217 | Sets the string that this string matcher will search for to \a |
218 | pattern. |
219 | |
220 | \sa pattern(), setCaseSensitivity(), indexIn() |
221 | */ |
222 | void QStringMatcher::setPattern(const QString &pattern) |
223 | { |
224 | q_pattern = pattern; |
225 | q_sv = q_pattern; |
226 | updateSkipTable(); |
227 | } |
228 | |
229 | /*! |
230 | \fn QString QStringMatcher::pattern() const |
231 | |
232 | Returns the string pattern that this string matcher will search |
233 | for. |
234 | |
235 | \sa setPattern() |
236 | */ |
237 | |
238 | QString QStringMatcher::pattern() const |
239 | { |
240 | if (!q_pattern.isEmpty()) |
241 | return q_pattern; |
242 | return q_sv.toString(); |
243 | } |
244 | |
245 | /*! |
246 | Sets the case sensitivity setting of this string matcher to \a |
247 | cs. |
248 | |
249 | \sa caseSensitivity(), setPattern(), indexIn() |
250 | */ |
251 | void QStringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs) |
252 | { |
253 | if (cs == q_cs) |
254 | return; |
255 | q_cs = cs; |
256 | updateSkipTable(); |
257 | } |
258 | |
259 | /*! \fn qsizetype QStringMatcher::indexIn(const QString &str, qsizetype from) const |
260 | |
261 | Searches the string \a str from character position \a from |
262 | (default 0, i.e. from the first character), for the string |
263 | pattern() that was set in the constructor or in the most recent |
264 | call to setPattern(). Returns the position where the pattern() |
265 | matched in \a str, or -1 if no match was found. |
266 | |
267 | \sa setPattern(), setCaseSensitivity() |
268 | */ |
269 | |
270 | /*! \fn qsizetype QStringMatcher::indexIn(const QChar *str, qsizetype length, qsizetype from) const |
271 | \since 4.5 |
272 | |
273 | Searches the string starting at \a str (of length \a length) from |
274 | character position \a from (default 0, i.e. from the first |
275 | character), for the string pattern() that was set in the |
276 | constructor or in the most recent call to setPattern(). Returns |
277 | the position where the pattern() matched in \a str, or -1 if no |
278 | match was found. |
279 | |
280 | \sa setPattern(), setCaseSensitivity() |
281 | */ |
282 | |
283 | /*! |
284 | \since 5.14 |
285 | |
286 | Searches the string \a str from character position \a from |
287 | (default 0, i.e. from the first character), for the string |
288 | pattern() that was set in the constructor or in the most recent |
289 | call to setPattern(). Returns the position where the pattern() |
290 | matched in \a str, or -1 if no match was found. |
291 | |
292 | \sa setPattern(), setCaseSensitivity() |
293 | */ |
294 | qsizetype QStringMatcher::indexIn(QStringView str, qsizetype from) const |
295 | { |
296 | if (from < 0) |
297 | from = 0; |
298 | return bm_find(haystack: str, index: from, needle: q_sv, skiptable: q_skiptable, cs: q_cs); |
299 | } |
300 | |
301 | /*! |
302 | \fn Qt::CaseSensitivity QStringMatcher::caseSensitivity() const |
303 | |
304 | Returns the case sensitivity setting for this string matcher. |
305 | |
306 | \sa setCaseSensitivity() |
307 | */ |
308 | |
309 | /*! |
310 | \internal |
311 | */ |
312 | |
313 | qsizetype qFindStringBoyerMoore( |
314 | QStringView haystack, qsizetype haystackOffset, |
315 | QStringView needle, Qt::CaseSensitivity cs) |
316 | { |
317 | uchar skiptable[256]; |
318 | bm_init_skiptable(needle, skiptable, cs); |
319 | if (haystackOffset < 0) |
320 | haystackOffset = 0; |
321 | return bm_find(haystack, index: haystackOffset, needle, skiptable, cs); |
322 | } |
323 | |
324 | QT_END_NAMESPACE |
325 | |