1 | // Copyright (C) 2022 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qlatin1stringmatcher.h" |
5 | #include <limits.h> |
6 | |
7 | QT_BEGIN_NAMESPACE |
8 | |
9 | /*! \class QLatin1StringMatcher |
10 | \inmodule QtCore |
11 | \brief Optimized search for substring in Latin-1 text. |
12 | |
13 | A QLatin1StringMatcher can search for one QLatin1StringView |
14 | as a substring of another, either ignoring case or taking it into |
15 | account. |
16 | |
17 | \since 6.5 |
18 | \ingroup tools |
19 | \ingroup string-processing |
20 | |
21 | This class is useful when you have a Latin-1 encoded string that |
22 | you want to repeatedly search for in some QLatin1StringViews |
23 | (perhaps in a loop), or when you want to search for all |
24 | instances of it in a given QLatin1StringView. Using a matcher |
25 | object and indexIn() is faster than matching a plain |
26 | QLatin1StringView with QLatin1StringView::indexOf() if repeated |
27 | matching takes place. This class offers no benefit if you are |
28 | doing one-off matches. The string to be searched for must not |
29 | be destroyed or changed before the matcher object is destroyed, |
30 | as the matcher accesses the string when searching for it. |
31 | |
32 | Create a QLatin1StringMatcher for the QLatin1StringView |
33 | you want to search for and the case sensitivity. Then call |
34 | indexIn() with the QLatin1StringView that you want to search |
35 | within. |
36 | |
37 | \sa QLatin1StringView, QStringMatcher, QByteArrayMatcher |
38 | */ |
39 | |
40 | /*! |
41 | Construct an empty Latin-1 string matcher. |
42 | This will match at each position in any string. |
43 | \sa setPattern(), setCaseSensitivity(), indexIn() |
44 | */ |
45 | QLatin1StringMatcher::QLatin1StringMatcher() noexcept |
46 | : m_pattern(), |
47 | m_cs(Qt::CaseSensitive), |
48 | m_caseSensitiveSearcher(m_pattern.data(), m_pattern.data()) |
49 | { |
50 | } |
51 | |
52 | /*! |
53 | Constructs a Latin-1 string matcher that searches for the given \a pattern |
54 | with given case sensitivity \a cs. The \a pattern argument must |
55 | not be destroyed before this matcher object. Call indexIn() |
56 | to find the \a pattern in the given QLatin1StringView. |
57 | */ |
58 | QLatin1StringMatcher::QLatin1StringMatcher(QLatin1StringView pattern, |
59 | Qt::CaseSensitivity cs) noexcept |
60 | : m_pattern(pattern), m_cs(cs) |
61 | { |
62 | setSearcher(); |
63 | } |
64 | |
65 | /*! |
66 | Destroys the Latin-1 string matcher. |
67 | */ |
68 | QLatin1StringMatcher::~QLatin1StringMatcher() noexcept |
69 | { |
70 | freeSearcher(); |
71 | } |
72 | |
73 | /*! |
74 | \internal |
75 | */ |
76 | void QLatin1StringMatcher::setSearcher() noexcept |
77 | { |
78 | if (m_cs == Qt::CaseSensitive) { |
79 | new (&m_caseSensitiveSearcher) CaseSensitiveSearcher(m_pattern.data(), m_pattern.end()); |
80 | } else { |
81 | QtPrivate::QCaseInsensitiveLatin1Hash foldCase; |
82 | qsizetype bufferSize = std::min(a: m_pattern.size(), b: qsizetype(sizeof m_foldBuffer)); |
83 | for (qsizetype i = 0; i < bufferSize; ++i) |
84 | m_foldBuffer[i] = static_cast<char>(foldCase(m_pattern[i].toLatin1())); |
85 | |
86 | new (&m_caseInsensitiveSearcher) |
87 | CaseInsensitiveSearcher(m_foldBuffer, &m_foldBuffer[bufferSize]); |
88 | } |
89 | } |
90 | |
91 | /*! |
92 | \internal |
93 | */ |
94 | void QLatin1StringMatcher::freeSearcher() noexcept |
95 | { |
96 | if (m_cs == Qt::CaseSensitive) |
97 | m_caseSensitiveSearcher.~CaseSensitiveSearcher(); |
98 | else |
99 | m_caseInsensitiveSearcher.~CaseInsensitiveSearcher(); |
100 | } |
101 | |
102 | /*! |
103 | Sets the \a pattern to search for. The string pointed to by the |
104 | QLatin1StringView must not be destroyed before the matcher is |
105 | destroyed, unless it is set to point to a different \a pattern |
106 | with longer lifetime first. |
107 | |
108 | \sa pattern(), indexIn() |
109 | */ |
110 | void QLatin1StringMatcher::setPattern(QLatin1StringView pattern) noexcept |
111 | { |
112 | if (m_pattern.latin1() == pattern.latin1() && m_pattern.size() == pattern.size()) |
113 | return; // Same address and size |
114 | |
115 | freeSearcher(); |
116 | m_pattern = pattern; |
117 | setSearcher(); |
118 | } |
119 | |
120 | /*! |
121 | Returns the Latin-1 pattern that the matcher searches for. |
122 | |
123 | \sa setPattern(), indexIn() |
124 | */ |
125 | QLatin1StringView QLatin1StringMatcher::pattern() const noexcept |
126 | { |
127 | return m_pattern; |
128 | } |
129 | |
130 | /*! |
131 | Sets the case sensitivity to \a cs. |
132 | |
133 | \sa caseSensitivity(), indexIn() |
134 | */ |
135 | void QLatin1StringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs) noexcept |
136 | { |
137 | if (m_cs == cs) |
138 | return; |
139 | |
140 | freeSearcher(); |
141 | m_cs = cs; |
142 | setSearcher(); |
143 | } |
144 | |
145 | /*! |
146 | Returns the case sensitivity the matcher uses. |
147 | |
148 | \sa setCaseSensitivity(), indexIn() |
149 | */ |
150 | Qt::CaseSensitivity QLatin1StringMatcher::caseSensitivity() const noexcept |
151 | { |
152 | return m_cs; |
153 | } |
154 | |
155 | /*! |
156 | Searches for the pattern in the given \a haystack starting from |
157 | \a from. |
158 | |
159 | \sa caseSensitivity(), pattern() |
160 | */ |
161 | qsizetype QLatin1StringMatcher::indexIn(QLatin1StringView haystack, qsizetype from) const noexcept |
162 | { |
163 | if (m_pattern.isEmpty() && from == haystack.size()) |
164 | return from; |
165 | if (from >= haystack.size()) |
166 | return -1; |
167 | auto begin = haystack.begin() + from; |
168 | auto end = haystack.end(); |
169 | auto found = begin; |
170 | if (m_cs == Qt::CaseSensitive) { |
171 | found = m_caseSensitiveSearcher(begin, end, m_pattern.begin(), m_pattern.end()).begin; |
172 | if (found == end) |
173 | return -1; |
174 | } else { |
175 | const qsizetype bufferSize = std::min(a: m_pattern.size(), b: qsizetype(sizeof m_foldBuffer)); |
176 | const QLatin1StringView restNeedle = m_pattern.sliced(pos: bufferSize); |
177 | const bool needleLongerThanBuffer = restNeedle.size() > 0; |
178 | QLatin1StringView restHaystack = haystack; |
179 | do { |
180 | found = m_caseInsensitiveSearcher(found, end, m_foldBuffer, &m_foldBuffer[bufferSize]) |
181 | .begin; |
182 | if (found == end) { |
183 | return -1; |
184 | } else if (!needleLongerThanBuffer) { |
185 | break; |
186 | } |
187 | restHaystack = haystack.sliced( |
188 | pos: qMin(a: haystack.size(), |
189 | b: bufferSize + qsizetype(std::distance(first: haystack.begin(), last: found)))); |
190 | if (restHaystack.startsWith(s: restNeedle, cs: Qt::CaseInsensitive)) |
191 | break; |
192 | ++found; |
193 | } while (true); |
194 | } |
195 | return std::distance(first: haystack.begin(), last: found); |
196 | } |
197 | |
198 | QT_END_NAMESPACE |
199 | |