1///////////////////////////////////////////////////////////////////////////////
2/// \file regex_compiler.hpp
3/// Contains the definition of regex_compiler, a factory for building regex objects
4/// from strings.
5//
6// Copyright 2008 Eric Niebler. Distributed under the Boost
7// Software License, Version 1.0. (See accompanying file
8// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9
10#ifndef BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
11#define BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
12
13// MS compatible compilers support #pragma once
14#if defined(_MSC_VER)
15# pragma once
16#endif
17
18#include <map>
19#include <boost/config.hpp>
20#include <boost/assert.hpp>
21#include <boost/next_prior.hpp>
22#include <boost/range/begin.hpp>
23#include <boost/range/end.hpp>
24#include <boost/mpl/assert.hpp>
25#include <boost/throw_exception.hpp>
26#include <boost/type_traits/is_same.hpp>
27#include <boost/type_traits/is_pointer.hpp>
28#include <boost/utility/enable_if.hpp>
29#include <boost/iterator/iterator_traits.hpp>
30#include <boost/xpressive/basic_regex.hpp>
31#include <boost/xpressive/detail/dynamic/parser.hpp>
32#include <boost/xpressive/detail/dynamic/parse_charset.hpp>
33#include <boost/xpressive/detail/dynamic/parser_enum.hpp>
34#include <boost/xpressive/detail/dynamic/parser_traits.hpp>
35#include <boost/xpressive/detail/core/linker.hpp>
36#include <boost/xpressive/detail/core/optimize.hpp>
37
38namespace boost { namespace xpressive
39{
40
41///////////////////////////////////////////////////////////////////////////////
42// regex_compiler
43//
44/// \brief Class template regex_compiler is a factory for building basic_regex objects from a string.
45///
46/// Class template regex_compiler is used to construct a basic_regex object from a string. The string
47/// should contain a valid regular expression. You can imbue a regex_compiler object with a locale,
48/// after which all basic_regex objects created with that regex_compiler object will use that locale.
49/// After creating a regex_compiler object, and optionally imbueing it with a locale, you can call the
50/// compile() method to construct a basic_regex object, passing it the string representing the regular
51/// expression. You can call compile() multiple times on the same regex_compiler object. Two basic_regex
52/// objects compiled from the same string will have different regex_id's.
53template<typename BidiIter, typename RegexTraits, typename CompilerTraits>
54struct regex_compiler
55{
56 typedef BidiIter iterator_type;
57 typedef typename iterator_value<BidiIter>::type char_type;
58 typedef regex_constants::syntax_option_type flag_type;
59 typedef RegexTraits traits_type;
60 typedef typename traits_type::string_type string_type;
61 typedef typename traits_type::locale_type locale_type;
62 typedef typename traits_type::char_class_type char_class_type;
63
64 explicit regex_compiler(RegexTraits const &traits = RegexTraits())
65 : mark_count_(0)
66 , hidden_mark_count_(0)
67 , traits_(traits)
68 , upper_(0)
69 , self_()
70 , rules_()
71 {
72 this->upper_ = lookup_classname(this->rxtraits(), "upper");
73 }
74
75 ///////////////////////////////////////////////////////////////////////////
76 // imbue
77 /// Specify the locale to be used by a regex_compiler.
78 ///
79 /// \param loc The locale that this regex_compiler should use.
80 /// \return The previous locale.
81 locale_type imbue(locale_type loc)
82 {
83 locale_type oldloc = this->traits_.imbue(loc);
84 this->upper_ = lookup_classname(this->rxtraits(), "upper");
85 return oldloc;
86 }
87
88 ///////////////////////////////////////////////////////////////////////////
89 // getloc
90 /// Get the locale used by a regex_compiler.
91 ///
92 /// \return The locale used by this regex_compiler.
93 locale_type getloc() const
94 {
95 return this->traits_.getloc();
96 }
97
98 ///////////////////////////////////////////////////////////////////////////
99 // compile
100 /// Builds a basic_regex object from a range of characters.
101 ///
102 /// \param begin The beginning of a range of characters representing the
103 /// regular expression to compile.
104 /// \param end The end of a range of characters representing the
105 /// regular expression to compile.
106 /// \param flags Optional bitmask that determines how the pat string is
107 /// interpreted. (See syntax_option_type.)
108 /// \return A basic_regex object corresponding to the regular expression
109 /// represented by the character range.
110 /// \pre InputIter is a model of the InputIterator concept.
111 /// \pre [begin,end) is a valid range.
112 /// \pre The range of characters specified by [begin,end) contains a
113 /// valid string-based representation of a regular expression.
114 /// \throw regex_error when the range of characters has invalid regular
115 /// expression syntax.
116 template<typename InputIter>
117 basic_regex<BidiIter>
118 compile(InputIter begin, InputIter end, flag_type flags = regex_constants::ECMAScript)
119 {
120 typedef typename iterator_category<InputIter>::type category;
121 return this->compile_(begin, end, flags, category());
122 }
123
124 /// \overload
125 ///
126 template<typename InputRange>
127 typename disable_if<is_pointer<InputRange>, basic_regex<BidiIter> >::type
128 compile(InputRange const &pat, flag_type flags = regex_constants::ECMAScript)
129 {
130 return this->compile(boost::begin(pat), boost::end(pat), flags);
131 }
132
133 /// \overload
134 ///
135 basic_regex<BidiIter>
136 compile(char_type const *begin, flag_type flags = regex_constants::ECMAScript)
137 {
138 BOOST_ASSERT(0 != begin);
139 char_type const *end = begin + std::char_traits<char_type>::length(begin);
140 return this->compile(begin, end, flags);
141 }
142
143 /// \overload
144 ///
145 basic_regex<BidiIter> compile(char_type const *begin, std::size_t size, flag_type flags)
146 {
147 BOOST_ASSERT(0 != begin);
148 char_type const *end = begin + size;
149 return this->compile(begin, end, flags);
150 }
151
152 ///////////////////////////////////////////////////////////////////////////
153 // operator[]
154 /// Return a reference to the named regular expression. If no such named
155 /// regular expression exists, create a new regular expression and return
156 /// a reference to it.
157 ///
158 /// \param name A std::string containing the name of the regular expression.
159 /// \pre The string is not empty.
160 /// \throw bad_alloc on allocation failure.
161 basic_regex<BidiIter> &operator [](string_type const &name)
162 {
163 BOOST_ASSERT(!name.empty());
164 return this->rules_[name];
165 }
166
167 /// \overload
168 ///
169 basic_regex<BidiIter> const &operator [](string_type const &name) const
170 {
171 BOOST_ASSERT(!name.empty());
172 return this->rules_[name];
173 }
174
175private:
176
177 typedef detail::escape_value<char_type, char_class_type> escape_value;
178 typedef detail::alternate_matcher<detail::alternates_vector<BidiIter>, RegexTraits> alternate_matcher;
179
180 ///////////////////////////////////////////////////////////////////////////
181 // compile_
182 /// INTERNAL ONLY
183 template<typename FwdIter>
184 basic_regex<BidiIter> compile_(FwdIter begin, FwdIter end, flag_type flags, std::forward_iterator_tag)
185 {
186 BOOST_MPL_ASSERT((is_same<char_type, typename iterator_value<FwdIter>::type>));
187 using namespace regex_constants;
188 this->reset();
189 this->traits_.flags(flags);
190
191 basic_regex<BidiIter> rextmp, *prex = &rextmp;
192 FwdIter tmp = begin;
193
194 // Check if this regex is a named rule:
195 string_type name;
196 if(token_group_begin == this->traits_.get_token(tmp, end) &&
197 BOOST_XPR_ENSURE_(tmp != end, error_paren, "mismatched parenthesis") &&
198 token_rule_assign == this->traits_.get_group_type(tmp, end, name))
199 {
200 begin = tmp;
201 BOOST_XPR_ENSURE_
202 (
203 begin != end && token_group_end == this->traits_.get_token(begin, end)
204 , error_paren
205 , "mismatched parenthesis"
206 );
207 prex = &this->rules_[name];
208 }
209
210 this->self_ = detail::core_access<BidiIter>::get_regex_impl(*prex);
211
212 // at the top level, a regex is a sequence of alternates
213 detail::sequence<BidiIter> seq = this->parse_alternates(begin, end);
214 BOOST_XPR_ENSURE_(begin == end, error_paren, "mismatched parenthesis");
215
216 // terminate the sequence
217 seq += detail::make_dynamic<BidiIter>(detail::end_matcher());
218
219 // bundle the regex information into a regex_impl object
220 detail::common_compile(seq.xpr().matchable(), *this->self_, this->rxtraits());
221
222 this->self_->traits_ = new detail::traits_holder<RegexTraits>(this->rxtraits());
223 this->self_->mark_count_ = this->mark_count_;
224 this->self_->hidden_mark_count_ = this->hidden_mark_count_;
225
226 // References changed, update dependencies.
227 this->self_->tracking_update();
228 this->self_.reset();
229 return *prex;
230 }
231
232 ///////////////////////////////////////////////////////////////////////////
233 // compile_
234 /// INTERNAL ONLY
235 template<typename InputIter>
236 basic_regex<BidiIter> compile_(InputIter begin, InputIter end, flag_type flags, std::input_iterator_tag)
237 {
238 string_type pat(begin, end);
239 return this->compile_(boost::begin(pat), boost::end(pat), flags, std::forward_iterator_tag());
240 }
241
242 ///////////////////////////////////////////////////////////////////////////
243 // reset
244 /// INTERNAL ONLY
245 void reset()
246 {
247 this->mark_count_ = 0;
248 this->hidden_mark_count_ = 0;
249 this->traits_.flags(regex_constants::ECMAScript);
250 }
251
252 ///////////////////////////////////////////////////////////////////////////
253 // regex_traits
254 /// INTERNAL ONLY
255 traits_type &rxtraits()
256 {
257 return this->traits_.traits();
258 }
259
260 ///////////////////////////////////////////////////////////////////////////
261 // regex_traits
262 /// INTERNAL ONLY
263 traits_type const &rxtraits() const
264 {
265 return this->traits_.traits();
266 }
267
268 ///////////////////////////////////////////////////////////////////////////
269 // parse_alternates
270 /// INTERNAL ONLY
271 template<typename FwdIter>
272 detail::sequence<BidiIter> parse_alternates(FwdIter &begin, FwdIter end)
273 {
274 using namespace regex_constants;
275 int count = 0;
276 FwdIter tmp = begin;
277 detail::sequence<BidiIter> seq;
278
279 do switch(++count)
280 {
281 case 1:
282 seq = this->parse_sequence(tmp, end);
283 break;
284 case 2:
285 seq = detail::make_dynamic<BidiIter>(alternate_matcher()) | seq;
286 BOOST_FALLTHROUGH;
287 default:
288 seq |= this->parse_sequence(tmp, end);
289 }
290 while((begin = tmp) != end && token_alternate == this->traits_.get_token(tmp, end));
291
292 return seq;
293 }
294
295 ///////////////////////////////////////////////////////////////////////////
296 // parse_group
297 /// INTERNAL ONLY
298 template<typename FwdIter>
299 detail::sequence<BidiIter> parse_group(FwdIter &begin, FwdIter end)
300 {
301 using namespace regex_constants;
302 int mark_nbr = 0;
303 bool keeper = false;
304 bool lookahead = false;
305 bool lookbehind = false;
306 bool negative = false;
307 string_type name;
308
309 detail::sequence<BidiIter> seq, seq_end;
310 FwdIter tmp = FwdIter();
311
312 syntax_option_type old_flags = this->traits_.flags();
313
314 switch(this->traits_.get_group_type(begin, end, name))
315 {
316 case token_no_mark:
317 // Don't process empty groups like (?:) or (?i)
318 // BUGBUG this doesn't handle the degenerate (?:)+ correctly
319 if(token_group_end == this->traits_.get_token(tmp = begin, end))
320 {
321 return this->parse_atom(begin = tmp, end);
322 }
323 break;
324
325 case token_negative_lookahead:
326 negative = true;
327 BOOST_FALLTHROUGH;
328 case token_positive_lookahead:
329 lookahead = true;
330 break;
331
332 case token_negative_lookbehind:
333 negative = true;
334 BOOST_FALLTHROUGH;
335 case token_positive_lookbehind:
336 lookbehind = true;
337 break;
338
339 case token_independent_sub_expression:
340 keeper = true;
341 break;
342
343 case token_comment:
344 while(BOOST_XPR_ENSURE_(begin != end, error_paren, "mismatched parenthesis"))
345 {
346 switch(this->traits_.get_token(begin, end))
347 {
348 case token_group_end:
349 return this->parse_atom(begin, end);
350 case token_escape:
351 BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence");
352 BOOST_FALLTHROUGH;
353 case token_literal:
354 ++begin;
355 break;
356 default:
357 break;
358 }
359 }
360 break;
361
362 case token_recurse:
363 BOOST_XPR_ENSURE_
364 (
365 begin != end && token_group_end == this->traits_.get_token(begin, end)
366 , error_paren
367 , "mismatched parenthesis"
368 );
369 return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(this->self_));
370
371 case token_rule_assign:
372 BOOST_THROW_EXCEPTION(
373 regex_error(error_badrule, "rule assignments must be at the front of the regex")
374 );
375 break;
376
377 case token_rule_ref:
378 {
379 typedef detail::core_access<BidiIter> access;
380 BOOST_XPR_ENSURE_
381 (
382 begin != end && token_group_end == this->traits_.get_token(begin, end)
383 , error_paren
384 , "mismatched parenthesis"
385 );
386 basic_regex<BidiIter> &rex = this->rules_[name];
387 shared_ptr<detail::regex_impl<BidiIter> > impl = access::get_regex_impl(rex);
388 this->self_->track_reference(*impl);
389 return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(impl));
390 }
391
392 case token_named_mark:
393 mark_nbr = static_cast<int>(++this->mark_count_);
394 for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i)
395 {
396 BOOST_XPR_ENSURE_(this->self_->named_marks_[i].name_ != name, error_badmark, "named mark already exists");
397 }
398 this->self_->named_marks_.push_back(detail::named_mark<char_type>(name, this->mark_count_));
399 seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr));
400 seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr));
401 break;
402
403 case token_named_mark_ref:
404 BOOST_XPR_ENSURE_
405 (
406 begin != end && token_group_end == this->traits_.get_token(begin, end)
407 , error_paren
408 , "mismatched parenthesis"
409 );
410 for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i)
411 {
412 if(this->self_->named_marks_[i].name_ == name)
413 {
414 mark_nbr = static_cast<int>(this->self_->named_marks_[i].mark_nbr_);
415 return detail::make_backref_xpression<BidiIter>
416 (
417 mark_nbr, this->traits_.flags(), this->rxtraits()
418 );
419 }
420 }
421 BOOST_THROW_EXCEPTION(regex_error(error_badmark, "invalid named back-reference"));
422 break;
423
424 default:
425 mark_nbr = static_cast<int>(++this->mark_count_);
426 seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr));
427 seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr));
428 break;
429 }
430
431 // alternates
432 seq += this->parse_alternates(begin, end);
433 seq += seq_end;
434 BOOST_XPR_ENSURE_
435 (
436 begin != end && token_group_end == this->traits_.get_token(begin, end)
437 , error_paren
438 , "mismatched parenthesis"
439 );
440
441 typedef detail::shared_matchable<BidiIter> xpr_type;
442 if(lookahead)
443 {
444 seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
445 detail::lookahead_matcher<xpr_type> lam(seq.xpr(), negative, seq.pure());
446 seq = detail::make_dynamic<BidiIter>(lam);
447 }
448 else if(lookbehind)
449 {
450 seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
451 detail::lookbehind_matcher<xpr_type> lbm(seq.xpr(), seq.width().value(), negative, seq.pure());
452 seq = detail::make_dynamic<BidiIter>(lbm);
453 }
454 else if(keeper) // independent sub-expression
455 {
456 seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
457 detail::keeper_matcher<xpr_type> km(seq.xpr(), seq.pure());
458 seq = detail::make_dynamic<BidiIter>(km);
459 }
460
461 // restore the modifiers
462 this->traits_.flags(old_flags);
463 return seq;
464 }
465
466 ///////////////////////////////////////////////////////////////////////////
467 // parse_charset
468 /// INTERNAL ONLY
469 template<typename FwdIter>
470 detail::sequence<BidiIter> parse_charset(FwdIter &begin, FwdIter end)
471 {
472 detail::compound_charset<traits_type> chset;
473
474 // call out to a helper to actually parse the character set
475 detail::parse_charset(begin, end, chset, this->traits_);
476
477 return detail::make_charset_xpression<BidiIter>
478 (
479 chset
480 , this->rxtraits()
481 , this->traits_.flags()
482 );
483 }
484
485 ///////////////////////////////////////////////////////////////////////////
486 // parse_atom
487 /// INTERNAL ONLY
488 template<typename FwdIter>
489 detail::sequence<BidiIter> parse_atom(FwdIter &begin, FwdIter end)
490 {
491 using namespace regex_constants;
492 escape_value esc = { 0, 0, 0, detail::escape_char };
493 FwdIter old_begin = begin;
494
495 switch(this->traits_.get_token(begin, end))
496 {
497 case token_literal:
498 return detail::make_literal_xpression<BidiIter>
499 (
500 this->parse_literal(begin, end), this->traits_.flags(), this->rxtraits()
501 );
502
503 case token_any:
504 return detail::make_any_xpression<BidiIter>(this->traits_.flags(), this->rxtraits());
505
506 case token_assert_begin_sequence:
507 return detail::make_dynamic<BidiIter>(detail::assert_bos_matcher());
508
509 case token_assert_end_sequence:
510 return detail::make_dynamic<BidiIter>(detail::assert_eos_matcher());
511
512 case token_assert_begin_line:
513 return detail::make_assert_begin_line<BidiIter>(this->traits_.flags(), this->rxtraits());
514
515 case token_assert_end_line:
516 return detail::make_assert_end_line<BidiIter>(this->traits_.flags(), this->rxtraits());
517
518 case token_assert_word_boundary:
519 return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::true_>(), this->rxtraits());
520
521 case token_assert_not_word_boundary:
522 return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::false_>(), this->rxtraits());
523
524 case token_assert_word_begin:
525 return detail::make_assert_word<BidiIter>(detail::word_begin(), this->rxtraits());
526
527 case token_assert_word_end:
528 return detail::make_assert_word<BidiIter>(detail::word_end(), this->rxtraits());
529
530 case token_escape:
531 esc = this->parse_escape(begin, end);
532 switch(esc.type_)
533 {
534 case detail::escape_mark:
535 return detail::make_backref_xpression<BidiIter>
536 (
537 esc.mark_nbr_, this->traits_.flags(), this->rxtraits()
538 );
539 case detail::escape_char:
540 return detail::make_char_xpression<BidiIter>
541 (
542 esc.ch_, this->traits_.flags(), this->rxtraits()
543 );
544 case detail::escape_class:
545 return detail::make_posix_charset_xpression<BidiIter>
546 (
547 esc.class_
548 , this->is_upper_(*begin++)
549 , this->traits_.flags()
550 , this->rxtraits()
551 );
552 }
553
554 case token_group_begin:
555 return this->parse_group(begin, end);
556
557 case token_charset_begin:
558 return this->parse_charset(begin, end);
559
560 case token_invalid_quantifier:
561 BOOST_THROW_EXCEPTION(regex_error(error_badrepeat, "quantifier not expected"));
562 break;
563
564 case token_quote_meta_begin:
565 return detail::make_literal_xpression<BidiIter>
566 (
567 this->parse_quote_meta(begin, end), this->traits_.flags(), this->rxtraits()
568 );
569
570 case token_quote_meta_end:
571 BOOST_THROW_EXCEPTION(
572 regex_error(
573 error_escape
574 , "found quote-meta end without corresponding quote-meta begin"
575 )
576 );
577 break;
578
579 case token_end_of_pattern:
580 break;
581
582 default:
583 begin = old_begin;
584 break;
585 }
586
587 return detail::sequence<BidiIter>();
588 }
589
590 ///////////////////////////////////////////////////////////////////////////
591 // parse_quant
592 /// INTERNAL ONLY
593 template<typename FwdIter>
594 detail::sequence<BidiIter> parse_quant(FwdIter &begin, FwdIter end)
595 {
596 BOOST_ASSERT(begin != end);
597 detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ };
598 detail::sequence<BidiIter> seq = this->parse_atom(begin, end);
599
600 // BUGBUG this doesn't handle the degenerate (?:)+ correctly
601 if(!seq.empty() && begin != end && detail::quant_none != seq.quant())
602 {
603 if(this->traits_.get_quant_spec(begin, end, spec))
604 {
605 BOOST_ASSERT(spec.min_ <= spec.max_);
606
607 if(0 == spec.max_) // quant {0,0} is degenerate -- matches nothing.
608 {
609 seq = this->parse_quant(begin, end);
610 }
611 else
612 {
613 seq.repeat(spec);
614 }
615 }
616 }
617
618 return seq;
619 }
620
621 ///////////////////////////////////////////////////////////////////////////
622 // parse_sequence
623 /// INTERNAL ONLY
624 template<typename FwdIter>
625 detail::sequence<BidiIter> parse_sequence(FwdIter &begin, FwdIter end)
626 {
627 detail::sequence<BidiIter> seq;
628
629 while(begin != end)
630 {
631 detail::sequence<BidiIter> seq_quant = this->parse_quant(begin, end);
632
633 // did we find a quantified atom?
634 if(seq_quant.empty())
635 break;
636
637 // chain it to the end of the xpression sequence
638 seq += seq_quant;
639 }
640
641 return seq;
642 }
643
644 ///////////////////////////////////////////////////////////////////////////
645 // parse_literal
646 // scan ahead looking for char literals to be globbed together into a string literal
647 /// INTERNAL ONLY
648 template<typename FwdIter>
649 string_type parse_literal(FwdIter &begin, FwdIter end)
650 {
651 using namespace regex_constants;
652 BOOST_ASSERT(begin != end);
653 BOOST_ASSERT(token_literal == this->traits_.get_token(begin, end));
654 escape_value esc = { 0, 0, 0, detail::escape_char };
655 string_type literal(1, *begin);
656
657 for(FwdIter prev = begin, tmp = ++begin; begin != end; prev = begin, begin = tmp)
658 {
659 detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ };
660 if(this->traits_.get_quant_spec(tmp, end, spec))
661 {
662 if(literal.size() != 1)
663 {
664 begin = prev;
665 literal.erase(boost::prior(literal.end()));
666 }
667 return literal;
668 }
669 else switch(this->traits_.get_token(tmp, end))
670 {
671 case token_escape:
672 esc = this->parse_escape(tmp, end);
673 if(detail::escape_char != esc.type_) return literal;
674 literal.insert(literal.end(), esc.ch_);
675 break;
676 case token_literal:
677 literal.insert(literal.end(), *tmp++);
678 break;
679 default:
680 return literal;
681 }
682 }
683
684 return literal;
685 }
686
687 ///////////////////////////////////////////////////////////////////////////
688 // parse_quote_meta
689 // scan ahead looking for char literals to be globbed together into a string literal
690 /// INTERNAL ONLY
691 template<typename FwdIter>
692 string_type parse_quote_meta(FwdIter &begin, FwdIter end)
693 {
694 using namespace regex_constants;
695 FwdIter old_begin = begin, old_end;
696 while(end != (old_end = begin))
697 {
698 switch(this->traits_.get_token(begin, end))
699 {
700 case token_quote_meta_end:
701 return string_type(old_begin, old_end);
702 case token_escape:
703 BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence");
704 BOOST_FALLTHROUGH;
705 case token_invalid_quantifier:
706 case token_literal:
707 ++begin;
708 break;
709 default:
710 break;
711 }
712 }
713 return string_type(old_begin, begin);
714 }
715
716 ///////////////////////////////////////////////////////////////////////////////
717 // parse_escape
718 /// INTERNAL ONLY
719 template<typename FwdIter>
720 escape_value parse_escape(FwdIter &begin, FwdIter end)
721 {
722 BOOST_XPR_ENSURE_(begin != end, regex_constants::error_escape, "incomplete escape sequence");
723
724 // first, check to see if this can be a backreference
725 if(0 < this->rxtraits().value(*begin, 10))
726 {
727 // Parse at most 3 decimal digits.
728 FwdIter tmp = begin;
729 int mark_nbr = detail::toi(tmp, end, this->rxtraits(), 10, 999);
730
731 // If the resulting number could conceivably be a backref, then it is.
732 if(10 > mark_nbr || mark_nbr <= static_cast<int>(this->mark_count_))
733 {
734 begin = tmp;
735 escape_value esc = {0, mark_nbr, 0, detail::escape_mark};
736 return esc;
737 }
738 }
739
740 // Not a backreference, defer to the parse_escape helper
741 return detail::parse_escape(begin, end, this->traits_);
742 }
743
744 bool is_upper_(char_type ch) const
745 {
746 return 0 != this->upper_ && this->rxtraits().isctype(ch, this->upper_);
747 }
748
749 std::size_t mark_count_;
750 std::size_t hidden_mark_count_;
751 CompilerTraits traits_;
752 typename RegexTraits::char_class_type upper_;
753 shared_ptr<detail::regex_impl<BidiIter> > self_;
754 std::map<string_type, basic_regex<BidiIter> > rules_;
755};
756
757}} // namespace boost::xpressive
758
759#endif
760

source code of boost/boost/xpressive/regex_compiler.hpp