1 ///////////////////////////////////////////////////////////////////////////////
2/// \file regex_token_iterator.hpp
3/// Contains the definition of regex_token_iterator, and STL-compatible iterator
4/// for tokenizing a string using a regular expression.
5//
6// Copyright 2008 Eric Niebler. Distributed under the Boost
7// Software License, Version 1.0. (See accompanying file
8// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9
10#ifndef BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005
11#define BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005
12
13// MS compatible compilers support #pragma once
14#if defined(_MSC_VER)
15# pragma once
16#endif
17
18#include <vector>
19#include <boost/assert.hpp>
20#include <boost/mpl/assert.hpp>
21#include <boost/type_traits/is_same.hpp>
22#include <boost/type_traits/is_convertible.hpp>
23#include <boost/xpressive/regex_iterator.hpp>
24
25namespace boost { namespace xpressive { namespace detail
26{
27
28//////////////////////////////////////////////////////////////////////////
29// regex_token_iterator_impl
30//
31template<typename BidiIter>
32struct regex_token_iterator_impl
33 : counted_base<regex_token_iterator_impl<BidiIter> >
34{
35 typedef sub_match<BidiIter> value_type;
36
37 regex_token_iterator_impl
38 (
39 BidiIter begin
40 , BidiIter cur
41 , BidiIter end
42 , BidiIter next_search
43 , basic_regex<BidiIter> const &rex
44 , regex_constants::match_flag_type flags = regex_constants::match_default
45 , std::vector<int> subs = std::vector<int>(1, 0)
46 , int n = -2
47 , bool not_null = false
48 )
49 : iter_(begin, cur, end, next_search, rex, flags, not_null)
50 , result_()
51 , n_((-2 == n) ? (int)subs.size() - 1 : n)
52 , subs_()
53 {
54 BOOST_ASSERT(0 != subs.size());
55 this->subs_.swap(subs);
56 }
57
58 bool next()
59 {
60 if(-1 != this->n_)
61 {
62 BidiIter cur = this->iter_.state_.cur_;
63 if(0 != (++this->n_ %= (int)this->subs_.size()) || this->iter_.next())
64 {
65 this->result_ = (-1 == this->subs_[ this->n_ ])
66 ? this->iter_.what_.prefix()
67 : this->iter_.what_[ this->subs_[ this->n_ ] ];
68 return true;
69 }
70 else if(-1 == this->subs_[ this->n_-- ] && cur != this->iter_.state_.end_)
71 {
72 this->result_ = value_type(cur, this->iter_.state_.end_, true);
73 return true;
74 }
75 }
76
77 return false;
78 }
79
80 bool equal_to(regex_token_iterator_impl<BidiIter> const &that) const
81 {
82 return this->iter_.equal_to(that.iter_) && this->n_ == that.n_;
83 }
84
85 regex_iterator_impl<BidiIter> iter_;
86 value_type result_;
87 int n_;
88 std::vector<int> subs_;
89};
90
91inline int get_mark_number(int i)
92{
93 return i;
94}
95
96inline std::vector<int> to_vector(int subs)
97{
98 return std::vector<int>(1, subs);
99}
100
101inline std::vector<int> const &to_vector(std::vector<int> const &subs)
102{
103 return subs;
104}
105
106template<typename Int, std::size_t Size>
107inline std::vector<int> to_vector(Int const (&sub_matches)[ Size ])
108{
109 // so that people can specify sub-match indices inline with
110 // string literals, like "\1\2\3", leave off the trailing '\0'
111 std::size_t const size = Size - is_same<Int, char>::value;
112 std::vector<int> vect(size);
113 for(std::size_t i = 0; i < size; ++i)
114 {
115 vect[i] = get_mark_number(sub_matches[i]);
116 }
117 return vect;
118}
119
120template<typename Int>
121inline std::vector<int> to_vector(std::vector<Int> const &sub_matches)
122{
123 BOOST_MPL_ASSERT((is_convertible<Int, int>));
124 return std::vector<int>(sub_matches.begin(), sub_matches.end());
125}
126
127} // namespace detail
128
129//////////////////////////////////////////////////////////////////////////
130// regex_token_iterator
131//
132template<typename BidiIter>
133struct regex_token_iterator
134{
135 typedef basic_regex<BidiIter> regex_type;
136 typedef typename iterator_value<BidiIter>::type char_type;
137 typedef sub_match<BidiIter> value_type;
138 typedef std::ptrdiff_t difference_type;
139 typedef value_type const *pointer;
140 typedef value_type const &reference;
141 typedef std::forward_iterator_tag iterator_category;
142
143 /// INTERNAL ONLY
144 typedef detail::regex_token_iterator_impl<BidiIter> impl_type_;
145
146 /// \post \c *this is the end of sequence iterator.
147 regex_token_iterator()
148 : impl_()
149 {
150 }
151
152 /// \param begin The beginning of the character range to search.
153 /// \param end The end of the character range to search.
154 /// \param rex The regex pattern to search for.
155 /// \pre \c [begin,end) is a valid range.
156 regex_token_iterator
157 (
158 BidiIter begin
159 , BidiIter end
160 , basic_regex<BidiIter> const &rex
161 )
162 : impl_()
163 {
164 if(0 != rex.regex_id())
165 {
166 this->impl_ = new impl_type_(begin, begin, end, begin, rex);
167 this->next_();
168 }
169 }
170
171 /// \param begin The beginning of the character range to search.
172 /// \param end The end of the character range to search.
173 /// \param rex The regex pattern to search for.
174 /// \param args A let() expression with argument bindings for semantic actions.
175 /// \pre \c [begin,end) is a valid range.
176 template<typename LetExpr>
177 regex_token_iterator
178 (
179 BidiIter begin
180 , BidiIter end
181 , basic_regex<BidiIter> const &rex
182 , detail::let_<LetExpr> const &args
183 )
184 : impl_()
185 {
186 if(0 != rex.regex_id())
187 {
188 this->impl_ = new impl_type_(begin, begin, end, begin, rex);
189 detail::bind_args(args, this->impl_->iter_.what_);
190 this->next_();
191 }
192 }
193
194 /// \param begin The beginning of the character range to search.
195 /// \param end The end of the character range to search.
196 /// \param rex The regex pattern to search for.
197 /// \param subs A range of integers designating sub-matches to be treated as tokens.
198 /// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.)
199 /// \pre \c [begin,end) is a valid range.
200 /// \pre \c subs is either an integer greater or equal to -1,
201 /// or else an array or non-empty \c std::vector\<\> of such integers.
202 template<typename Subs>
203 regex_token_iterator
204 (
205 BidiIter begin
206 , BidiIter end
207 , basic_regex<BidiIter> const &rex
208 , Subs const &subs
209 , regex_constants::match_flag_type flags = regex_constants::match_default
210 )
211 : impl_()
212 {
213 if(0 != rex.regex_id())
214 {
215 this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs));
216 this->next_();
217 }
218 }
219
220 /// \param begin The beginning of the character range to search.
221 /// \param end The end of the character range to search.
222 /// \param rex The regex pattern to search for.
223 /// \param subs A range of integers designating sub-matches to be treated as tokens.
224 /// \param args A let() expression with argument bindings for semantic actions.
225 /// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.)
226 /// \pre \c [begin,end) is a valid range.
227 /// \pre \c subs is either an integer greater or equal to -1,
228 /// or else an array or non-empty \c std::vector\<\> of such integers.
229 template<typename Subs, typename LetExpr>
230 regex_token_iterator
231 (
232 BidiIter begin
233 , BidiIter end
234 , basic_regex<BidiIter> const &rex
235 , Subs const &subs
236 , detail::let_<LetExpr> const &args
237 , regex_constants::match_flag_type flags = regex_constants::match_default
238 )
239 : impl_()
240 {
241 if(0 != rex.regex_id())
242 {
243 this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs));
244 detail::bind_args(args, this->impl_->iter_.what_);
245 this->next_();
246 }
247 }
248
249 /// \post <tt>*this == that</tt>
250 regex_token_iterator(regex_token_iterator<BidiIter> const &that)
251 : impl_(that.impl_) // COW
252 {
253 }
254
255 /// \post <tt>*this == that</tt>
256 regex_token_iterator<BidiIter> &operator =(regex_token_iterator<BidiIter> const &that)
257 {
258 this->impl_ = that.impl_; // COW
259 return *this;
260 }
261
262 friend bool operator ==(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right)
263 {
264 if(!left.impl_ || !right.impl_)
265 {
266 return !left.impl_ && !right.impl_;
267 }
268
269 return left.impl_->equal_to(*right.impl_);
270 }
271
272 friend bool operator !=(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right)
273 {
274 return !(left == right);
275 }
276
277 value_type const &operator *() const
278 {
279 return this->impl_->result_;
280 }
281
282 value_type const *operator ->() const
283 {
284 return &this->impl_->result_;
285 }
286
287 /// If N == -1 then sets *this equal to the end of sequence iterator.
288 /// Otherwise if N+1 \< subs.size(), then increments N and sets result equal to
289 /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())).
290 /// Otherwise if what.prefix().first != what[0].second and if the element match_prev_avail is
291 /// not set in flags then sets it. Then locates the next match as if by calling
292 /// regex_search(what[0].second, end, what, *pre, flags), with the following variation:
293 /// in the event that the previous match found was of zero length (what[0].length() == 0)
294 /// then attempts to find a non-zero length match starting at what[0].second, only if that
295 /// fails and provided what[0].second != suffix().second does it look for a (possibly zero
296 /// length) match starting from what[0].second + 1. If such a match is found then sets N
297 /// equal to zero, and sets result equal to
298 /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())).
299 /// Otherwise if no further matches were found, then let last_end be the endpoint of the last
300 /// match that was found. Then if last_end != end and subs[0] == -1 sets N equal to -1 and
301 /// sets result equal to value_type(last_end, end). Otherwise sets *this equal to the end
302 /// of sequence iterator.
303 regex_token_iterator<BidiIter> &operator ++()
304 {
305 this->fork_(); // un-share the implementation
306 this->next_();
307 return *this;
308 }
309
310 regex_token_iterator<BidiIter> operator ++(int)
311 {
312 regex_token_iterator<BidiIter> tmp(*this);
313 ++*this;
314 return tmp;
315 }
316
317private:
318
319 /// INTERNAL ONLY
320 void fork_()
321 {
322 if(1 != this->impl_->use_count())
323 {
324 intrusive_ptr<impl_type_> clone = new impl_type_
325 (
326 this->impl_->iter_.state_.begin_
327 , this->impl_->iter_.state_.cur_
328 , this->impl_->iter_.state_.end_
329 , this->impl_->iter_.state_.next_search_
330 , this->impl_->iter_.rex_
331 , this->impl_->iter_.flags_
332 , this->impl_->subs_
333 , this->impl_->n_
334 , this->impl_->iter_.not_null_
335 );
336
337 // only copy the match_results struct if we have to. Note: if the next call
338 // to impl_->next() will return false or call regex_search, we don't need to
339 // copy the match_results struct.
340 if(-1 != this->impl_->n_ && this->impl_->n_ + 1 != static_cast<int>(this->impl_->subs_.size()))
341 {
342 // BUGBUG This is expensive -- it causes the sequence_stack to be cleared.
343 // Find a better way
344 clone->iter_.what_ = this->impl_->iter_.what_;
345 }
346 else
347 {
348 // At the very least, copy the action args
349 detail::core_access<BidiIter>::get_action_args(clone->iter_.what_)
350 = detail::core_access<BidiIter>::get_action_args(this->impl_->iter_.what_);
351 }
352
353 this->impl_.swap(clone);
354 }
355 }
356
357 /// INTERNAL ONLY
358 void next_()
359 {
360 BOOST_ASSERT(this->impl_ && 1 == this->impl_->use_count());
361 if(!this->impl_->next())
362 {
363 this->impl_ = 0;
364 }
365 }
366
367 intrusive_ptr<impl_type_> impl_;
368};
369
370}} // namespace boost::xpressive
371
372#endif
373

source code of boost/boost/xpressive/regex_token_iterator.hpp