1// Copyright (C) 2022 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qlatin1stringmatcher.h"
5#include <limits.h>
6
7QT_BEGIN_NAMESPACE
8
9/*! \class QLatin1StringMatcher
10 \inmodule QtCore
11 \brief Optimized search for substring in Latin-1 text.
12
13 A QLatin1StringMatcher can search for one QLatin1StringView
14 as a substring of another, either ignoring case or taking it into
15 account.
16
17 \since 6.5
18 \ingroup tools
19 \ingroup string-processing
20
21 This class is useful when you have a Latin-1 encoded string that
22 you want to repeatedly search for in some QLatin1StringViews
23 (perhaps in a loop), or when you want to search for all
24 instances of it in a given QLatin1StringView. Using a matcher
25 object and indexIn() is faster than matching a plain
26 QLatin1StringView with QLatin1StringView::indexOf() if repeated
27 matching takes place. This class offers no benefit if you are
28 doing one-off matches. The string to be searched for must not
29 be destroyed or changed before the matcher object is destroyed,
30 as the matcher accesses the string when searching for it.
31
32 Create a QLatin1StringMatcher for the QLatin1StringView
33 you want to search for and the case sensitivity. Then call
34 indexIn() with the QLatin1StringView that you want to search
35 within.
36
37 \sa QLatin1StringView, QStringMatcher, QByteArrayMatcher
38*/
39
40/*!
41 Construct an empty Latin-1 string matcher.
42 This will match at each position in any string.
43 \sa setPattern(), setCaseSensitivity(), indexIn()
44*/
45QLatin1StringMatcher::QLatin1StringMatcher() noexcept
46 : m_pattern(),
47 m_cs(Qt::CaseSensitive),
48 m_caseSensitiveSearcher(m_pattern.data(), m_pattern.data())
49{
50}
51
52/*!
53 Constructs a Latin-1 string matcher that searches for the given \a pattern
54 with given case sensitivity \a cs. The \a pattern argument must
55 not be destroyed before this matcher object. Call indexIn()
56 to find the \a pattern in the given QLatin1StringView.
57*/
58QLatin1StringMatcher::QLatin1StringMatcher(QLatin1StringView pattern,
59 Qt::CaseSensitivity cs) noexcept
60 : m_pattern(pattern), m_cs(cs)
61{
62 setSearcher();
63}
64
65/*!
66 Destroys the Latin-1 string matcher.
67*/
68QLatin1StringMatcher::~QLatin1StringMatcher() noexcept
69{
70 freeSearcher();
71}
72
73/*!
74 \internal
75*/
76void QLatin1StringMatcher::setSearcher() noexcept
77{
78 if (m_cs == Qt::CaseSensitive) {
79 new (&m_caseSensitiveSearcher) CaseSensitiveSearcher(m_pattern.data(), m_pattern.end());
80 } else {
81 QtPrivate::QCaseInsensitiveLatin1Hash foldCase;
82 qsizetype bufferSize = std::min(a: m_pattern.size(), b: qsizetype(sizeof m_foldBuffer));
83 for (qsizetype i = 0; i < bufferSize; ++i)
84 m_foldBuffer[i] = static_cast<char>(foldCase(m_pattern[i].toLatin1()));
85
86 new (&m_caseInsensitiveSearcher)
87 CaseInsensitiveSearcher(m_foldBuffer, &m_foldBuffer[bufferSize]);
88 }
89}
90
91/*!
92 \internal
93*/
94void QLatin1StringMatcher::freeSearcher() noexcept
95{
96 if (m_cs == Qt::CaseSensitive)
97 m_caseSensitiveSearcher.~CaseSensitiveSearcher();
98 else
99 m_caseInsensitiveSearcher.~CaseInsensitiveSearcher();
100}
101
102/*!
103 Sets the \a pattern to search for. The string pointed to by the
104 QLatin1StringView must not be destroyed before the matcher is
105 destroyed, unless it is set to point to a different \a pattern
106 with longer lifetime first.
107
108 \sa pattern(), indexIn()
109*/
110void QLatin1StringMatcher::setPattern(QLatin1StringView pattern) noexcept
111{
112 if (m_pattern.latin1() == pattern.latin1() && m_pattern.size() == pattern.size())
113 return; // Same address and size
114
115 freeSearcher();
116 m_pattern = pattern;
117 setSearcher();
118}
119
120/*!
121 Returns the Latin-1 pattern that the matcher searches for.
122
123 \sa setPattern(), indexIn()
124*/
125QLatin1StringView QLatin1StringMatcher::pattern() const noexcept
126{
127 return m_pattern;
128}
129
130/*!
131 Sets the case sensitivity to \a cs.
132
133 \sa caseSensitivity(), indexIn()
134*/
135void QLatin1StringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs) noexcept
136{
137 if (m_cs == cs)
138 return;
139
140 freeSearcher();
141 m_cs = cs;
142 setSearcher();
143}
144
145/*!
146 Returns the case sensitivity the matcher uses.
147
148 \sa setCaseSensitivity(), indexIn()
149*/
150Qt::CaseSensitivity QLatin1StringMatcher::caseSensitivity() const noexcept
151{
152 return m_cs;
153}
154
155/*!
156 Searches for the pattern in the given \a haystack starting from
157 \a from.
158
159 \sa caseSensitivity(), pattern()
160*/
161qsizetype QLatin1StringMatcher::indexIn(QLatin1StringView haystack, qsizetype from) const noexcept
162{
163 if (m_pattern.isEmpty() && from == haystack.size())
164 return from;
165 if (from >= haystack.size())
166 return -1;
167 auto begin = haystack.begin() + from;
168 auto end = haystack.end();
169 auto found = begin;
170 if (m_cs == Qt::CaseSensitive) {
171 found = m_caseSensitiveSearcher(begin, end, m_pattern.begin(), m_pattern.end()).begin;
172 if (found == end)
173 return -1;
174 } else {
175 const qsizetype bufferSize = std::min(a: m_pattern.size(), b: qsizetype(sizeof m_foldBuffer));
176 const QLatin1StringView restNeedle = m_pattern.sliced(pos: bufferSize);
177 const bool needleLongerThanBuffer = restNeedle.size() > 0;
178 QLatin1StringView restHaystack = haystack;
179 do {
180 found = m_caseInsensitiveSearcher(found, end, m_foldBuffer, &m_foldBuffer[bufferSize])
181 .begin;
182 if (found == end) {
183 return -1;
184 } else if (!needleLongerThanBuffer) {
185 break;
186 }
187 restHaystack = haystack.sliced(
188 pos: qMin(a: haystack.size(),
189 b: bufferSize + qsizetype(std::distance(first: haystack.begin(), last: found))));
190 if (restHaystack.startsWith(s: restNeedle, cs: Qt::CaseInsensitive))
191 break;
192 ++found;
193 } while (true);
194 }
195 return std::distance(first: haystack.begin(), last: found);
196}
197
198QT_END_NAMESPACE
199

source code of qtbase/src/corelib/text/qlatin1stringmatcher.cpp