1 | // Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | #ifndef QSTRINGTOKENIZER_H |
4 | #define QSTRINGTOKENIZER_H |
5 | |
6 | #include <QtCore/qnamespace.h> |
7 | #include <QtCore/qcontainerfwd.h> |
8 | |
9 | QT_BEGIN_NAMESPACE |
10 | |
11 | template <typename, typename> class QStringBuilder; |
12 | |
13 | #define Q_STRINGTOKENIZER_USE_SENTINEL |
14 | |
15 | class QStringTokenizerBaseBase |
16 | { |
17 | protected: |
18 | ~QStringTokenizerBaseBase() = default; |
19 | constexpr QStringTokenizerBaseBase(Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept |
20 | : m_sb{sb}, m_cs{cs} {} |
21 | |
22 | struct tokenizer_state { |
23 | qsizetype start, end, ; |
24 | friend constexpr bool operator==(tokenizer_state lhs, tokenizer_state rhs) noexcept |
25 | { return lhs.start == rhs.start && lhs.end == rhs.end && lhs.extra == rhs.extra; } |
26 | friend constexpr bool operator!=(tokenizer_state lhs, tokenizer_state rhs) noexcept |
27 | { return !operator==(lhs, rhs); } |
28 | }; |
29 | |
30 | Qt::SplitBehavior m_sb; |
31 | Qt::CaseSensitivity m_cs; |
32 | }; |
33 | |
34 | template <typename Haystack, typename Needle> |
35 | class QStringTokenizerBase : protected QStringTokenizerBaseBase |
36 | { |
37 | struct next_result { |
38 | Haystack value; |
39 | bool ok; |
40 | tokenizer_state state; |
41 | }; |
42 | inline next_result next(tokenizer_state state) const noexcept; |
43 | inline next_result toFront() const noexcept { return next(state: {}); } |
44 | public: |
45 | constexpr explicit QStringTokenizerBase(Haystack haystack, Needle needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept |
46 | : QStringTokenizerBaseBase{sb, cs}, m_haystack{haystack}, m_needle{needle} {} |
47 | |
48 | class iterator; |
49 | friend class iterator; |
50 | #ifdef Q_STRINGTOKENIZER_USE_SENTINEL |
51 | class sentinel { |
52 | friend constexpr bool operator==(sentinel, sentinel) noexcept { return true; } |
53 | friend constexpr bool operator!=(sentinel, sentinel) noexcept { return false; } |
54 | }; |
55 | #else |
56 | using sentinel = iterator; |
57 | #endif |
58 | class iterator { |
59 | const QStringTokenizerBase *tokenizer; |
60 | next_result current; |
61 | friend class QStringTokenizerBase; |
62 | explicit iterator(const QStringTokenizerBase &t) noexcept |
63 | : tokenizer{&t}, current{t.toFront()} {} |
64 | public: |
65 | using difference_type = qsizetype; |
66 | using value_type = Haystack; |
67 | using pointer = const value_type*; |
68 | using reference = const value_type&; |
69 | using iterator_category = std::forward_iterator_tag; |
70 | |
71 | iterator() noexcept = default; |
72 | |
73 | // violates std::forward_iterator (returns a reference into the iterator) |
74 | [[nodiscard]] constexpr const Haystack* operator->() const { return Q_ASSERT(current.ok), ¤t.value; } |
75 | [[nodiscard]] constexpr const Haystack& operator*() const { return *operator->(); } |
76 | |
77 | iterator& operator++() { advance(); return *this; } |
78 | iterator operator++(int) { auto tmp = *this; advance(); return tmp; } |
79 | |
80 | friend constexpr bool operator==(const iterator &lhs, const iterator &rhs) noexcept |
81 | { return lhs.current.ok == rhs.current.ok && (!lhs.current.ok || (Q_ASSERT(lhs.tokenizer == rhs.tokenizer), lhs.current.state == rhs.current.state)); } |
82 | friend constexpr bool operator!=(const iterator &lhs, const iterator &rhs) noexcept |
83 | { return !operator==(lhs, rhs); } |
84 | #ifdef Q_STRINGTOKENIZER_USE_SENTINEL |
85 | friend constexpr bool operator==(const iterator &lhs, sentinel) noexcept |
86 | { return !lhs.current.ok; } |
87 | friend constexpr bool operator!=(const iterator &lhs, sentinel) noexcept |
88 | { return !operator==(lhs, sentinel{}); } |
89 | friend constexpr bool operator==(sentinel, const iterator &rhs) noexcept |
90 | { return !rhs.current.ok; } |
91 | friend constexpr bool operator!=(sentinel, const iterator &rhs) noexcept |
92 | { return !operator==(sentinel{}, rhs); } |
93 | #endif |
94 | private: |
95 | void advance() { |
96 | Q_ASSERT(current.ok); |
97 | current = tokenizer->next(current.state); |
98 | } |
99 | }; |
100 | using const_iterator = iterator; |
101 | |
102 | using size_type = std::size_t; |
103 | using difference_type = typename iterator::difference_type; |
104 | using value_type = typename iterator::value_type; |
105 | using pointer = typename iterator::pointer; |
106 | using const_pointer = pointer; |
107 | using reference = typename iterator::reference; |
108 | using const_reference = reference; |
109 | |
110 | [[nodiscard]] iterator begin() const noexcept { return iterator{*this}; } |
111 | [[nodiscard]] iterator cbegin() const noexcept { return begin(); } |
112 | template <bool = std::is_same<iterator, sentinel>::value> // ODR protection |
113 | [[nodiscard]] constexpr sentinel end() const noexcept { return {}; } |
114 | template <bool = std::is_same<iterator, sentinel>::value> // ODR protection |
115 | [[nodiscard]] constexpr sentinel cend() const noexcept { return {}; } |
116 | |
117 | private: |
118 | Haystack m_haystack; |
119 | Needle m_needle; |
120 | }; |
121 | |
122 | QT_BEGIN_INCLUDE_NAMESPACE |
123 | #include <QtCore/qstringview.h> |
124 | QT_END_INCLUDE_NAMESPACE |
125 | |
126 | namespace QtPrivate { |
127 | namespace Tok { |
128 | |
129 | constexpr qsizetype size(QChar) noexcept { return 1; } |
130 | template <typename String> |
131 | constexpr qsizetype size(const String &s) noexcept { return static_cast<qsizetype>(s.size()); } |
132 | |
133 | template <typename String> struct ViewForImpl {}; |
134 | template <> struct ViewForImpl<QStringView> { using type = QStringView; }; |
135 | template <> struct ViewForImpl<QLatin1StringView> { using type = QLatin1StringView; }; |
136 | template <> struct ViewForImpl<QChar> { using type = QChar; }; |
137 | template <> struct ViewForImpl<QString> : ViewForImpl<QStringView> {}; |
138 | template <> struct ViewForImpl<QLatin1Char> : ViewForImpl<QChar> {}; |
139 | template <> struct ViewForImpl<char16_t> : ViewForImpl<QChar> {}; |
140 | template <> struct ViewForImpl<char16_t*> : ViewForImpl<QStringView> {}; |
141 | template <> struct ViewForImpl<const char16_t*> : ViewForImpl<QStringView> {}; |
142 | template <typename LHS, typename RHS> |
143 | struct ViewForImpl<QStringBuilder<LHS, RHS>> : ViewForImpl<typename QStringBuilder<LHS,RHS>::ConvertTo> {}; |
144 | template <typename Char, typename...Args> |
145 | struct ViewForImpl<std::basic_string<Char, Args...>> : ViewForImpl<Char*> {}; |
146 | #ifdef __cpp_lib_string_view |
147 | template <typename Char, typename...Args> |
148 | struct ViewForImpl<std::basic_string_view<Char, Args...>> : ViewForImpl<Char*> {}; |
149 | #endif |
150 | |
151 | // This metafunction maps a StringLike to a View (currently, QChar, |
152 | // QStringView, QLatin1StringView). This is what QStringTokenizerBase |
153 | // operates on. QStringTokenizer adds pinning to keep rvalues alive |
154 | // for the duration of the algorithm. |
155 | template <typename String> |
156 | using ViewFor = typename ViewForImpl<typename std::decay<String>::type>::type; |
157 | |
158 | // Pinning: |
159 | // rvalues of owning string types need to be moved into QStringTokenizer |
160 | // to keep them alive for the lifetime of the tokenizer. For lvalues, we |
161 | // assume the user takes care of that. |
162 | |
163 | // default: don't pin anything (characters are pinned implicitly) |
164 | template <typename String> |
165 | struct PinForImpl { using type = ViewFor<String>; }; |
166 | |
167 | // rvalue QString -> QString |
168 | template <> |
169 | struct PinForImpl<QString> { using type = QString; }; |
170 | |
171 | // rvalue std::basic_string -> basic_string |
172 | template <typename Char, typename...Args> |
173 | struct PinForImpl<std::basic_string<Char, Args...>> |
174 | { using type = std::basic_string<Char, Args...>; }; |
175 | |
176 | // rvalue QStringBuilder -> pin as the nested ConvertTo type |
177 | template <typename LHS, typename RHS> |
178 | struct PinForImpl<QStringBuilder<LHS, RHS>> |
179 | : PinForImpl<typename QStringBuilder<LHS, RHS>::ConvertTo> {}; |
180 | |
181 | template <typename StringLike> |
182 | using PinFor = typename PinForImpl<typename std::remove_cv<StringLike>::type>::type; |
183 | |
184 | template <typename T> struct is_owning_string_type : std::false_type {}; |
185 | template <> struct is_owning_string_type<QString> : std::true_type {}; |
186 | template <typename...Args> struct is_owning_string_type<std::basic_string<Args...>> : std::true_type {}; |
187 | |
188 | // unpinned |
189 | template <typename T, bool pinned = is_owning_string_type<T>::value> |
190 | struct Pinning |
191 | { |
192 | // this is the storage for non-pinned types - no storage |
193 | constexpr Pinning(const T&) noexcept {} |
194 | // Since we don't store something, the view() method needs to be |
195 | // given something it can return. |
196 | constexpr T view(T t) const noexcept { return t; } |
197 | }; |
198 | |
199 | // pinned |
200 | template <typename T> |
201 | struct Pinning<T, true> |
202 | { |
203 | T m_string; |
204 | // specialisation for owning string types (QString, std::u16string): |
205 | // stores the string: |
206 | constexpr Pinning(T &&s) noexcept : m_string{std::move(s)} {} |
207 | // ... and thus view() uses that instead of the argument passed in: |
208 | constexpr QStringView view(const T&) const noexcept { return m_string; } |
209 | }; |
210 | |
211 | // NeedlePinning and HaystackPinning are there to distinguish them as |
212 | // base classes of QStringTokenizer. We use inheritance to reap the |
213 | // empty base class optimization. |
214 | template <typename T> |
215 | struct NeedlePinning : Pinning<T> |
216 | { |
217 | using Pinning<T>::Pinning; |
218 | template <typename Arg> |
219 | constexpr auto needleView(Arg &&a) noexcept |
220 | -> decltype(this->view(std::forward<Arg>(a))) |
221 | { return this->view(std::forward<Arg>(a)); } |
222 | }; |
223 | |
224 | template <typename T> |
225 | struct HaystackPinning : Pinning<T> |
226 | { |
227 | using Pinning<T>::Pinning; |
228 | template <typename Arg> |
229 | constexpr auto haystackView(Arg &&a) noexcept |
230 | -> decltype(this->view(std::forward<Arg>(a))) |
231 | { return this->view(std::forward<Arg>(a)); } |
232 | }; |
233 | |
234 | // The Base of a QStringTokenizer is QStringTokenizerBase for the views |
235 | // corresponding to the Haystack and Needle template arguments |
236 | // |
237 | // ie. QStringTokenizer<QString, QString> |
238 | // : QStringTokenizerBase<QStringView, QStringView> (+ pinning) |
239 | template <typename Haystack, typename Needle> |
240 | using TokenizerBase = QStringTokenizerBase<ViewFor<Haystack>, ViewFor<Needle>>; |
241 | } // namespace Tok |
242 | } // namespace QtPrivate |
243 | |
244 | template <typename Haystack, typename Needle> |
245 | class QStringTokenizer |
246 | : private QtPrivate::Tok::HaystackPinning<Haystack>, |
247 | private QtPrivate::Tok::NeedlePinning<Needle>, |
248 | public QtPrivate::Tok::TokenizerBase<Haystack, Needle> |
249 | { |
250 | using HPin = QtPrivate::Tok::HaystackPinning<Haystack>; |
251 | using NPin = QtPrivate::Tok::NeedlePinning<Needle>; |
252 | using Base = QtPrivate::Tok::TokenizerBase<Haystack, Needle>; |
253 | template <typename Container, typename HPin> |
254 | struct if_haystack_not_pinned_impl : std::enable_if<std::is_empty<HPin>::value, bool> {}; |
255 | template <typename Container> |
256 | using if_haystack_not_pinned = typename if_haystack_not_pinned_impl<Container, HPin>::type; |
257 | template <typename Container, typename Iterator = decltype(std::begin(std::declval<Container>()))> |
258 | using if_compatible_container = typename std::enable_if< |
259 | std::is_convertible< |
260 | typename Base::value_type, |
261 | typename std::iterator_traits<Iterator>::value_type |
262 | >::value, |
263 | bool |
264 | >::type; |
265 | public: |
266 | using value_type = typename Base::value_type; |
267 | using difference_type = typename Base::difference_type; |
268 | using size_type = typename Base::size_type; |
269 | using reference = typename Base::reference; |
270 | using const_reference = typename Base::const_reference; |
271 | using pointer = typename Base::pointer; |
272 | using const_pointer = typename Base::const_pointer; |
273 | using iterator = typename Base::iterator; |
274 | using const_iterator = typename Base::const_iterator; |
275 | using sentinel = typename Base::sentinel; |
276 | |
277 | #ifdef Q_QDOC |
278 | [[nodiscard]] iterator begin() const noexcept { return Base::begin(); } |
279 | [[nodiscard]] iterator cbegin() const noexcept { return begin(); } |
280 | [[nodiscard]] constexpr sentinel end() const noexcept { return {}; } |
281 | [[nodiscard]] constexpr sentinel cend() const noexcept { return {}; } |
282 | #endif |
283 | |
284 | constexpr explicit QStringTokenizer(Haystack haystack, Needle needle, |
285 | Qt::CaseSensitivity cs, |
286 | Qt::SplitBehavior sb = Qt::KeepEmptyParts) |
287 | noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value) |
288 | // here, we present the haystack to Pinning<>, for optional storing. |
289 | // If it did store, haystack is moved-from and mustn't be touched |
290 | // any longer, which is why view() for these Pinning<>s ignores the |
291 | // argument. |
292 | : HPin{std::forward<Haystack>(haystack)}, |
293 | NPin{std::forward<Needle>(needle)}, |
294 | // If Pinning<> didn't store, we pass the haystack (ditto needle) |
295 | // to view() again, so it can be copied from there. |
296 | Base{this->haystackView(haystack), |
297 | this->needleView(needle), sb, cs} |
298 | {} |
299 | constexpr explicit QStringTokenizer(Haystack haystack, Needle needle, |
300 | Qt::SplitBehavior sb = Qt::KeepEmptyParts, |
301 | Qt::CaseSensitivity cs = Qt::CaseSensitive) |
302 | noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value) |
303 | : HPin{std::forward<Haystack>(haystack)}, |
304 | NPin{std::forward<Needle>(needle)}, |
305 | Base{this->haystackView(haystack), |
306 | this->needleView(needle), sb, cs} |
307 | {} |
308 | |
309 | #ifdef Q_QDOC |
310 | template<typename LContainer> LContainer toContainer(LContainer &&c = {}) const & {} |
311 | template<typename RContainer> RContainer toContainer(RContainer &&c = {}) const && {} |
312 | #else |
313 | template<typename Container = QList<value_type>, if_compatible_container<Container> = true> |
314 | Container toContainer(Container &&c = {}) const & |
315 | { |
316 | for (auto e : *this) |
317 | c.emplace_back(e); |
318 | return std::forward<Container>(c); |
319 | } |
320 | template<typename Container = QList<value_type>, if_compatible_container<Container> = true, |
321 | if_haystack_not_pinned<Container> = true> |
322 | Container toContainer(Container &&c = {}) const && |
323 | { |
324 | for (auto e : *this) |
325 | c.emplace_back(e); |
326 | return std::forward<Container>(c); |
327 | } |
328 | #endif |
329 | }; |
330 | |
331 | namespace QtPrivate { |
332 | namespace Tok { |
333 | // This meta function just calculated the template arguments for the |
334 | // QStringTokenizer (not -Base), based on the actual arguments passed |
335 | // to qTokenize() (or the ctor, with CTAD). It basically detects rvalue |
336 | // QString and std::basic_string and otherwise decays the arguments to |
337 | // the respective view type. |
338 | // |
339 | // #define works around a C++ restriction: [temp.deduct.guide]/3 seems |
340 | // to ask for the simple-template-id following the `->` of a deduction |
341 | // guide to be identical to the class name for which we guide deduction. |
342 | // In particular, Clang rejects a template alias there, while GCC accepts |
343 | // it. |
344 | #define Q_TOK_RESULT \ |
345 | QStringTokenizer< \ |
346 | QtPrivate::Tok::PinFor<Haystack>, \ |
347 | QtPrivate::Tok::PinFor<Needle> \ |
348 | > \ |
349 | /*end*/ |
350 | template <typename Haystack, typename Needle> |
351 | using TokenizerResult = Q_TOK_RESULT; |
352 | template <typename Haystack, typename Needle> |
353 | using is_nothrow_constructible_from = std::is_nothrow_copy_constructible<TokenizerResult<Haystack, Needle>>; |
354 | } |
355 | } |
356 | |
357 | #ifdef __cpp_deduction_guides |
358 | // these tell the compiler how to determine the QStringTokenizer |
359 | // template arguments based on the constructor arguments (CTAD): |
360 | template <typename Haystack, typename Needle> |
361 | QStringTokenizer(Haystack&&, Needle&&) |
362 | -> Q_TOK_RESULT; |
363 | template <typename Haystack, typename Needle> |
364 | QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior) |
365 | -> Q_TOK_RESULT; |
366 | template <typename Haystack, typename Needle> |
367 | QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior, Qt::CaseSensitivity) |
368 | -> Q_TOK_RESULT; |
369 | template <typename Haystack, typename Needle> |
370 | QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity) |
371 | -> Q_TOK_RESULT; |
372 | template <typename Haystack, typename Needle> |
373 | QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity, Qt::SplitBehavior) |
374 | -> Q_TOK_RESULT; |
375 | #endif |
376 | |
377 | #undef Q_TOK_RESULT |
378 | |
379 | template <typename Haystack, typename Needle, typename...Flags> |
380 | [[nodiscard]] constexpr auto |
381 | qTokenize(Haystack &&h, Needle &&n, Flags...flags) |
382 | noexcept(QtPrivate::Tok::is_nothrow_constructible_from<Haystack, Needle>::value) |
383 | -> decltype(QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h), |
384 | std::forward<Needle>(n), flags...}) |
385 | { return QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h), |
386 | std::forward<Needle>(n), |
387 | flags...}; } |
388 | |
389 | template <typename Haystack, typename Needle> |
390 | auto QStringTokenizerBase<Haystack, Needle>::next(tokenizer_state state) const noexcept -> next_result |
391 | { |
392 | while (true) { |
393 | if (state.end < 0) { |
394 | // already at end: |
395 | return {{}, false, state}; |
396 | } |
397 | state.end = m_haystack.indexOf(m_needle, state.start + state.extra, m_cs); |
398 | Haystack result; |
399 | if (state.end >= 0) { |
400 | // token separator found => return intermediate element: |
401 | result = m_haystack.sliced(state.start, state.end - state.start); |
402 | const auto ns = QtPrivate::Tok::size(m_needle); |
403 | state.start = state.end + ns; |
404 | state.extra = (ns == 0 ? 1 : 0); |
405 | } else { |
406 | // token separator not found => return final element: |
407 | result = m_haystack.sliced(state.start); |
408 | } |
409 | if ((m_sb & Qt::SkipEmptyParts) && result.isEmpty()) |
410 | continue; |
411 | return {result, true, state}; |
412 | } |
413 | } |
414 | |
415 | QT_END_NAMESPACE |
416 | |
417 | #endif /* QSTRINGTOKENIZER_H */ |
418 | |