1// Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3#ifndef QSTRINGTOKENIZER_H
4#define QSTRINGTOKENIZER_H
5
6#include <QtCore/qnamespace.h>
7#include <QtCore/qcontainerfwd.h>
8
9QT_BEGIN_NAMESPACE
10
11template <typename, typename> class QStringBuilder;
12
13#define Q_STRINGTOKENIZER_USE_SENTINEL
14
15class QStringTokenizerBaseBase
16{
17protected:
18 ~QStringTokenizerBaseBase() = default;
19 constexpr QStringTokenizerBaseBase(Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept
20 : m_sb{sb}, m_cs{cs} {}
21
22 struct tokenizer_state {
23 qsizetype start, end, extra;
24 friend constexpr bool operator==(tokenizer_state lhs, tokenizer_state rhs) noexcept
25 { return lhs.start == rhs.start && lhs.end == rhs.end && lhs.extra == rhs.extra; }
26 friend constexpr bool operator!=(tokenizer_state lhs, tokenizer_state rhs) noexcept
27 { return !operator==(lhs, rhs); }
28 };
29
30 Qt::SplitBehavior m_sb;
31 Qt::CaseSensitivity m_cs;
32};
33
34template <typename Haystack, typename Needle>
35class QStringTokenizerBase : protected QStringTokenizerBaseBase
36{
37 struct next_result {
38 Haystack value;
39 bool ok;
40 tokenizer_state state;
41 };
42 inline next_result next(tokenizer_state state) const noexcept;
43 inline next_result toFront() const noexcept { return next(state: {}); }
44public:
45 constexpr explicit QStringTokenizerBase(Haystack haystack, Needle needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept
46 : QStringTokenizerBaseBase{sb, cs}, m_haystack{haystack}, m_needle{needle} {}
47
48 class iterator;
49 friend class iterator;
50#ifdef Q_STRINGTOKENIZER_USE_SENTINEL
51 class sentinel {
52 friend constexpr bool operator==(sentinel, sentinel) noexcept { return true; }
53 friend constexpr bool operator!=(sentinel, sentinel) noexcept { return false; }
54 };
55#else
56 using sentinel = iterator;
57#endif
58 class iterator {
59 const QStringTokenizerBase *tokenizer;
60 next_result current;
61 friend class QStringTokenizerBase;
62 explicit iterator(const QStringTokenizerBase &t) noexcept
63 : tokenizer{&t}, current{t.toFront()} {}
64 public:
65 using difference_type = qsizetype;
66 using value_type = Haystack;
67 using pointer = const value_type*;
68 using reference = const value_type&;
69 using iterator_category = std::forward_iterator_tag;
70
71 iterator() noexcept = default;
72
73 // violates std::forward_iterator (returns a reference into the iterator)
74 [[nodiscard]] constexpr const Haystack* operator->() const { return Q_ASSERT(current.ok), &current.value; }
75 [[nodiscard]] constexpr const Haystack& operator*() const { return *operator->(); }
76
77 iterator& operator++() { advance(); return *this; }
78 iterator operator++(int) { auto tmp = *this; advance(); return tmp; }
79
80 friend constexpr bool operator==(const iterator &lhs, const iterator &rhs) noexcept
81 { return lhs.current.ok == rhs.current.ok && (!lhs.current.ok || (Q_ASSERT(lhs.tokenizer == rhs.tokenizer), lhs.current.state == rhs.current.state)); }
82 friend constexpr bool operator!=(const iterator &lhs, const iterator &rhs) noexcept
83 { return !operator==(lhs, rhs); }
84#ifdef Q_STRINGTOKENIZER_USE_SENTINEL
85 friend constexpr bool operator==(const iterator &lhs, sentinel) noexcept
86 { return !lhs.current.ok; }
87 friend constexpr bool operator!=(const iterator &lhs, sentinel) noexcept
88 { return !operator==(lhs, sentinel{}); }
89 friend constexpr bool operator==(sentinel, const iterator &rhs) noexcept
90 { return !rhs.current.ok; }
91 friend constexpr bool operator!=(sentinel, const iterator &rhs) noexcept
92 { return !operator==(sentinel{}, rhs); }
93#endif
94 private:
95 void advance() {
96 Q_ASSERT(current.ok);
97 current = tokenizer->next(current.state);
98 }
99 };
100 using const_iterator = iterator;
101
102 using size_type = std::size_t;
103 using difference_type = typename iterator::difference_type;
104 using value_type = typename iterator::value_type;
105 using pointer = typename iterator::pointer;
106 using const_pointer = pointer;
107 using reference = typename iterator::reference;
108 using const_reference = reference;
109
110 [[nodiscard]] iterator begin() const noexcept { return iterator{*this}; }
111 [[nodiscard]] iterator cbegin() const noexcept { return begin(); }
112 template <bool = std::is_same<iterator, sentinel>::value> // ODR protection
113 [[nodiscard]] constexpr sentinel end() const noexcept { return {}; }
114 template <bool = std::is_same<iterator, sentinel>::value> // ODR protection
115 [[nodiscard]] constexpr sentinel cend() const noexcept { return {}; }
116
117private:
118 Haystack m_haystack;
119 Needle m_needle;
120};
121
122QT_BEGIN_INCLUDE_NAMESPACE
123#include <QtCore/qstringview.h>
124QT_END_INCLUDE_NAMESPACE
125
126namespace QtPrivate {
127namespace Tok {
128
129 constexpr qsizetype size(QChar) noexcept { return 1; }
130 template <typename String>
131 constexpr qsizetype size(const String &s) noexcept { return static_cast<qsizetype>(s.size()); }
132
133 template <typename String> struct ViewForImpl {};
134 template <> struct ViewForImpl<QStringView> { using type = QStringView; };
135 template <> struct ViewForImpl<QLatin1StringView> { using type = QLatin1StringView; };
136 template <> struct ViewForImpl<QChar> { using type = QChar; };
137 template <> struct ViewForImpl<QString> : ViewForImpl<QStringView> {};
138 template <> struct ViewForImpl<QLatin1Char> : ViewForImpl<QChar> {};
139 template <> struct ViewForImpl<char16_t> : ViewForImpl<QChar> {};
140 template <> struct ViewForImpl<char16_t*> : ViewForImpl<QStringView> {};
141 template <> struct ViewForImpl<const char16_t*> : ViewForImpl<QStringView> {};
142 template <typename LHS, typename RHS>
143 struct ViewForImpl<QStringBuilder<LHS, RHS>> : ViewForImpl<typename QStringBuilder<LHS,RHS>::ConvertTo> {};
144 template <typename Char, typename...Args>
145 struct ViewForImpl<std::basic_string<Char, Args...>> : ViewForImpl<Char*> {};
146#ifdef __cpp_lib_string_view
147 template <typename Char, typename...Args>
148 struct ViewForImpl<std::basic_string_view<Char, Args...>> : ViewForImpl<Char*> {};
149#endif
150
151 // This metafunction maps a StringLike to a View (currently, QChar,
152 // QStringView, QLatin1StringView). This is what QStringTokenizerBase
153 // operates on. QStringTokenizer adds pinning to keep rvalues alive
154 // for the duration of the algorithm.
155 template <typename String>
156 using ViewFor = typename ViewForImpl<typename std::decay<String>::type>::type;
157
158 // Pinning:
159 // rvalues of owning string types need to be moved into QStringTokenizer
160 // to keep them alive for the lifetime of the tokenizer. For lvalues, we
161 // assume the user takes care of that.
162
163 // default: don't pin anything (characters are pinned implicitly)
164 template <typename String>
165 struct PinForImpl { using type = ViewFor<String>; };
166
167 // rvalue QString -> QString
168 template <>
169 struct PinForImpl<QString> { using type = QString; };
170
171 // rvalue std::basic_string -> basic_string
172 template <typename Char, typename...Args>
173 struct PinForImpl<std::basic_string<Char, Args...>>
174 { using type = std::basic_string<Char, Args...>; };
175
176 // rvalue QStringBuilder -> pin as the nested ConvertTo type
177 template <typename LHS, typename RHS>
178 struct PinForImpl<QStringBuilder<LHS, RHS>>
179 : PinForImpl<typename QStringBuilder<LHS, RHS>::ConvertTo> {};
180
181 template <typename StringLike>
182 using PinFor = typename PinForImpl<typename std::remove_cv<StringLike>::type>::type;
183
184 template <typename T> struct is_owning_string_type : std::false_type {};
185 template <> struct is_owning_string_type<QString> : std::true_type {};
186 template <typename...Args> struct is_owning_string_type<std::basic_string<Args...>> : std::true_type {};
187
188 // unpinned
189 template <typename T, bool pinned = is_owning_string_type<T>::value>
190 struct Pinning
191 {
192 // this is the storage for non-pinned types - no storage
193 constexpr Pinning(const T&) noexcept {}
194 // Since we don't store something, the view() method needs to be
195 // given something it can return.
196 constexpr T view(T t) const noexcept { return t; }
197 };
198
199 // pinned
200 template <typename T>
201 struct Pinning<T, true>
202 {
203 T m_string;
204 // specialisation for owning string types (QString, std::u16string):
205 // stores the string:
206 constexpr Pinning(T &&s) noexcept : m_string{std::move(s)} {}
207 // ... and thus view() uses that instead of the argument passed in:
208 constexpr QStringView view(const T&) const noexcept { return m_string; }
209 };
210
211 // NeedlePinning and HaystackPinning are there to distinguish them as
212 // base classes of QStringTokenizer. We use inheritance to reap the
213 // empty base class optimization.
214 template <typename T>
215 struct NeedlePinning : Pinning<T>
216 {
217 using Pinning<T>::Pinning;
218 template <typename Arg>
219 constexpr auto needleView(Arg &&a) noexcept
220 -> decltype(this->view(std::forward<Arg>(a)))
221 { return this->view(std::forward<Arg>(a)); }
222 };
223
224 template <typename T>
225 struct HaystackPinning : Pinning<T>
226 {
227 using Pinning<T>::Pinning;
228 template <typename Arg>
229 constexpr auto haystackView(Arg &&a) noexcept
230 -> decltype(this->view(std::forward<Arg>(a)))
231 { return this->view(std::forward<Arg>(a)); }
232 };
233
234 // The Base of a QStringTokenizer is QStringTokenizerBase for the views
235 // corresponding to the Haystack and Needle template arguments
236 //
237 // ie. QStringTokenizer<QString, QString>
238 // : QStringTokenizerBase<QStringView, QStringView> (+ pinning)
239 template <typename Haystack, typename Needle>
240 using TokenizerBase = QStringTokenizerBase<ViewFor<Haystack>, ViewFor<Needle>>;
241} // namespace Tok
242} // namespace QtPrivate
243
244template <typename Haystack, typename Needle>
245class QStringTokenizer
246 : private QtPrivate::Tok::HaystackPinning<Haystack>,
247 private QtPrivate::Tok::NeedlePinning<Needle>,
248 public QtPrivate::Tok::TokenizerBase<Haystack, Needle>
249{
250 using HPin = QtPrivate::Tok::HaystackPinning<Haystack>;
251 using NPin = QtPrivate::Tok::NeedlePinning<Needle>;
252 using Base = QtPrivate::Tok::TokenizerBase<Haystack, Needle>;
253 template <typename Container, typename HPin>
254 struct if_haystack_not_pinned_impl : std::enable_if<std::is_empty<HPin>::value, bool> {};
255 template <typename Container>
256 using if_haystack_not_pinned = typename if_haystack_not_pinned_impl<Container, HPin>::type;
257 template <typename Container, typename Iterator = decltype(std::begin(std::declval<Container>()))>
258 using if_compatible_container = typename std::enable_if<
259 std::is_convertible<
260 typename Base::value_type,
261 typename std::iterator_traits<Iterator>::value_type
262 >::value,
263 bool
264 >::type;
265public:
266 using value_type = typename Base::value_type;
267 using difference_type = typename Base::difference_type;
268 using size_type = typename Base::size_type;
269 using reference = typename Base::reference;
270 using const_reference = typename Base::const_reference;
271 using pointer = typename Base::pointer;
272 using const_pointer = typename Base::const_pointer;
273 using iterator = typename Base::iterator;
274 using const_iterator = typename Base::const_iterator;
275 using sentinel = typename Base::sentinel;
276
277#ifdef Q_QDOC
278 [[nodiscard]] iterator begin() const noexcept { return Base::begin(); }
279 [[nodiscard]] iterator cbegin() const noexcept { return begin(); }
280 [[nodiscard]] constexpr sentinel end() const noexcept { return {}; }
281 [[nodiscard]] constexpr sentinel cend() const noexcept { return {}; }
282#endif
283
284 constexpr explicit QStringTokenizer(Haystack haystack, Needle needle,
285 Qt::CaseSensitivity cs,
286 Qt::SplitBehavior sb = Qt::KeepEmptyParts)
287 noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value)
288 // here, we present the haystack to Pinning<>, for optional storing.
289 // If it did store, haystack is moved-from and mustn't be touched
290 // any longer, which is why view() for these Pinning<>s ignores the
291 // argument.
292 : HPin{std::forward<Haystack>(haystack)},
293 NPin{std::forward<Needle>(needle)},
294 // If Pinning<> didn't store, we pass the haystack (ditto needle)
295 // to view() again, so it can be copied from there.
296 Base{this->haystackView(haystack),
297 this->needleView(needle), sb, cs}
298 {}
299 constexpr explicit QStringTokenizer(Haystack haystack, Needle needle,
300 Qt::SplitBehavior sb = Qt::KeepEmptyParts,
301 Qt::CaseSensitivity cs = Qt::CaseSensitive)
302 noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value)
303 : HPin{std::forward<Haystack>(haystack)},
304 NPin{std::forward<Needle>(needle)},
305 Base{this->haystackView(haystack),
306 this->needleView(needle), sb, cs}
307 {}
308
309#ifdef Q_QDOC
310 template<typename LContainer> LContainer toContainer(LContainer &&c = {}) const & {}
311 template<typename RContainer> RContainer toContainer(RContainer &&c = {}) const && {}
312#else
313 template<typename Container = QList<value_type>, if_compatible_container<Container> = true>
314 Container toContainer(Container &&c = {}) const &
315 {
316 for (auto e : *this)
317 c.emplace_back(e);
318 return std::forward<Container>(c);
319 }
320 template<typename Container = QList<value_type>, if_compatible_container<Container> = true,
321 if_haystack_not_pinned<Container> = true>
322 Container toContainer(Container &&c = {}) const &&
323 {
324 for (auto e : *this)
325 c.emplace_back(e);
326 return std::forward<Container>(c);
327 }
328#endif
329};
330
331namespace QtPrivate {
332namespace Tok {
333// This meta function just calculated the template arguments for the
334// QStringTokenizer (not -Base), based on the actual arguments passed
335// to qTokenize() (or the ctor, with CTAD). It basically detects rvalue
336// QString and std::basic_string and otherwise decays the arguments to
337// the respective view type.
338//
339// #define works around a C++ restriction: [temp.deduct.guide]/3 seems
340// to ask for the simple-template-id following the `->` of a deduction
341// guide to be identical to the class name for which we guide deduction.
342// In particular, Clang rejects a template alias there, while GCC accepts
343// it.
344#define Q_TOK_RESULT \
345 QStringTokenizer< \
346 QtPrivate::Tok::PinFor<Haystack>, \
347 QtPrivate::Tok::PinFor<Needle> \
348 > \
349 /*end*/
350template <typename Haystack, typename Needle>
351using TokenizerResult = Q_TOK_RESULT;
352template <typename Haystack, typename Needle>
353using is_nothrow_constructible_from = std::is_nothrow_copy_constructible<TokenizerResult<Haystack, Needle>>;
354}
355}
356
357#ifdef __cpp_deduction_guides
358// these tell the compiler how to determine the QStringTokenizer
359// template arguments based on the constructor arguments (CTAD):
360template <typename Haystack, typename Needle>
361QStringTokenizer(Haystack&&, Needle&&)
362 -> Q_TOK_RESULT;
363template <typename Haystack, typename Needle>
364QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior)
365 -> Q_TOK_RESULT;
366template <typename Haystack, typename Needle>
367QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior, Qt::CaseSensitivity)
368 -> Q_TOK_RESULT;
369template <typename Haystack, typename Needle>
370QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity)
371 -> Q_TOK_RESULT;
372template <typename Haystack, typename Needle>
373QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity, Qt::SplitBehavior)
374 -> Q_TOK_RESULT;
375#endif
376
377#undef Q_TOK_RESULT
378
379template <typename Haystack, typename Needle, typename...Flags>
380[[nodiscard]] constexpr auto
381qTokenize(Haystack &&h, Needle &&n, Flags...flags)
382 noexcept(QtPrivate::Tok::is_nothrow_constructible_from<Haystack, Needle>::value)
383 -> decltype(QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h),
384 std::forward<Needle>(n), flags...})
385{ return QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h),
386 std::forward<Needle>(n),
387 flags...}; }
388
389template <typename Haystack, typename Needle>
390auto QStringTokenizerBase<Haystack, Needle>::next(tokenizer_state state) const noexcept -> next_result
391{
392 while (true) {
393 if (state.end < 0) {
394 // already at end:
395 return {{}, false, state};
396 }
397 state.end = m_haystack.indexOf(m_needle, state.start + state.extra, m_cs);
398 Haystack result;
399 if (state.end >= 0) {
400 // token separator found => return intermediate element:
401 result = m_haystack.sliced(state.start, state.end - state.start);
402 const auto ns = QtPrivate::Tok::size(m_needle);
403 state.start = state.end + ns;
404 state.extra = (ns == 0 ? 1 : 0);
405 } else {
406 // token separator not found => return final element:
407 result = m_haystack.sliced(state.start);
408 }
409 if ((m_sb & Qt::SkipEmptyParts) && result.isEmpty())
410 continue;
411 return {result, true, state};
412 }
413}
414
415QT_END_NAMESPACE
416
417#endif /* QSTRINGTOKENIZER_H */
418

source code of qtbase/src/corelib/text/qstringtokenizer.h