1///////////////////////////////////////////////////////////////////////////////
2// toy_spirit.hpp
3//
4// Copyright 2008 Eric Niebler. Distributed under the Boost
5// Software License, Version 1.0. (See accompanying file
6// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7
8#include <cctype>
9#include <string>
10#include <cstring>
11#include <iostream>
12#include <boost/assert.hpp>
13#include <boost/mpl/assert.hpp>
14#include <boost/proto/core.hpp>
15#include <boost/proto/context.hpp>
16#include <boost/test/unit_test.hpp>
17
18namespace boost
19{
20 // global tags
21 struct char_tag {};
22 struct ichar_tag {};
23 struct istring_tag {};
24 struct ichar_range_tag {};
25 struct never_tag {};
26 struct always_tag {};
27 struct space_tag {};
28
29 // global primitives
30 proto::terminal<char_tag>::type const char_ = {.child0: {}};
31 proto::terminal<space_tag>::type const space = {.child0: {}};
32
33 using proto::lit;
34 using proto::literal;
35}
36
37namespace boost { namespace spirit2
38{
39
40 // handy typedefs
41 typedef proto::terminal<char_tag>::type anychar_p;
42 typedef proto::terminal<ichar_tag>::type ianychar_p;
43 typedef proto::terminal<istring_tag>::type ianystr_p;
44 typedef proto::terminal<ichar_range_tag>::type ianychar_range_p;
45 typedef proto::terminal<never_tag>::type never_p;
46 typedef proto::terminal<space_tag>::type space_p;
47
48 struct SpiritGrammar;
49 struct SkipperGrammar;
50 struct SpiritPrimitives;
51 template<typename Grammar>
52 struct SpiritComposites;
53
54 struct CharLiteral
55 : proto::terminal<char>
56 {};
57
58 struct NTBSLiteral
59 : proto::terminal<char const *>
60 {};
61
62 struct StdStringLiteral
63 : proto::terminal<std::string>
64 {};
65
66 struct CharParser
67 : proto::function<anychar_p, CharLiteral>
68 {};
69
70 struct ICharParser
71 : proto::function<ianychar_p, CharLiteral, CharLiteral>
72 {};
73
74 struct CharRangeParser
75 : proto::function<anychar_p, CharLiteral, CharLiteral>
76 {};
77
78 struct IStrParser
79 : proto::function<ianystr_p, StdStringLiteral>
80 {};
81
82 struct ICharRangeParser
83 : proto::function<ianychar_range_p, CharLiteral, CharLiteral>
84 {};
85
86 ianychar_p const ichar_ = {.child0: {}};
87 ianystr_p const istr_ = {.child0: {}};
88 ianychar_range_p const ichar_range_ = {.child0: {}};
89
90 namespace utility
91 {
92 inline bool char_icmp(char ch, char lo, char hi)
93 {
94 return ch == lo || ch == hi;
95 }
96
97 template<typename FwdIter>
98 inline bool string_cmp(char const *sz, FwdIter &begin, FwdIter end)
99 {
100 FwdIter tmp = begin;
101 for(; *sz; ++tmp, ++sz)
102 if(tmp == end || *tmp != *sz)
103 return false;
104 begin = tmp;
105 return true;
106 }
107
108 template<typename FwdIter>
109 inline bool string_icmp(std::string const &str, FwdIter &begin, FwdIter end)
110 {
111 BOOST_ASSERT(0 == str.size() % 2);
112 FwdIter tmp = begin;
113 std::string::const_iterator istr = str.begin(), estr = str.end();
114 for(; istr != estr; ++tmp, istr += 2)
115 if(tmp == end || (*tmp != *istr && *tmp != *(istr+1)))
116 return false;
117 begin = tmp;
118 return true;
119 }
120
121 inline bool in_range(char ch, char lo, char hi)
122 {
123 return ch >= lo && ch <= hi;
124 }
125
126 inline bool in_irange(char ch, char lo, char hi)
127 {
128 return in_range(ch, lo, hi)
129 || in_range(ch: std::tolower(c: ch), lo, hi)
130 || in_range(ch: std::toupper(c: ch), lo, hi);
131 }
132
133 inline std::string to_istr(char const *sz)
134 {
135 std::string res;
136 res.reserve(res_arg: std::strlen(s: sz) * 2);
137 for(; *sz; ++sz)
138 {
139 res.push_back(c: std::tolower(c: *sz));
140 res.push_back(c: std::toupper(c: *sz));
141 }
142 return res;
143 }
144 } // namespace utility
145
146 template<typename FwdIter, typename Skipper = never_p>
147 struct spirit_context
148 : std::pair<FwdIter, FwdIter>
149 , proto::callable_context<spirit_context<FwdIter, Skipper> >
150 {
151 typedef bool result_type;
152 typedef FwdIter iterator;
153
154 spirit_context(FwdIter first, FwdIter second, Skipper const &skip = Skipper())
155 : std::pair<FwdIter, FwdIter>(first, second)
156 , skip_(skip)
157 , in_skip_(false)
158 {}
159
160 // parse function for anychar_p
161 bool operator()(proto::tag::terminal, char_tag)
162 {
163 this->skip();
164 if(this->first == this->second)
165 return false;
166 ++this->first;
167 return true;
168 }
169
170 // parse function for char_('a')
171 template<typename Expr>
172 bool operator()(proto::tag::function, anychar_p, Expr const &expr)
173 {
174 this->skip();
175 return proto::eval(expr, *this);
176 }
177
178 // parse function for space_p
179 bool operator()(proto::tag::terminal, space_tag)
180 {
181 this->skip();
182 if(this->first == this->second || !std::isspace(*this->first))
183 return false;
184 ++this->first;
185 return true;
186 }
187
188 // parse function for bare character literals
189 bool operator()(proto::tag::terminal, char ch)
190 {
191 this->skip();
192 if(this->first == this->second || *this->first != ch)
193 return false;
194 ++this->first;
195 return true;
196 }
197
198 // case-insensitive character parser
199 template<typename Arg1, typename Arg2>
200 bool operator()(proto::tag::function, ianychar_p, Arg1 const &arg1, Arg2 const &arg2)
201 {
202 this->skip();
203 if(this->first == this->second
204 || !utility::char_icmp(ch: *this->first, lo: proto::value(arg1), hi: proto::value(arg2)))
205 return false;
206 ++this->first;
207 return true;
208 }
209
210 // parse function for NTBS literals
211 bool operator()(proto::tag::terminal, char const *sz)
212 {
213 this->skip();
214 return utility::string_cmp(sz, this->first, this->second);
215 }
216
217 // parse function for istr_("hello")
218 template<typename Expr>
219 bool operator()(proto::tag::function, ianystr_p, Expr const &expr)
220 {
221 this->skip();
222 return utility::string_icmp(proto::value(expr), this->first, this->second);
223 }
224
225 // parse function for char_('a','z')
226 template<typename Arg1, typename Arg2>
227 bool operator()(proto::tag::function, anychar_p, Arg1 const &arg1, Arg2 const &arg2)
228 {
229 BOOST_ASSERT(proto::value(arg1) <= proto::value(arg2));
230 this->skip();
231 if(this->first == this->second
232 || !utility::in_range(ch: *this->first, lo: proto::value(arg1), hi: proto::value(arg2)))
233 return false;
234 ++this->first;
235 return true;
236 }
237
238 // parse function for ichar_range_('a','z')
239 template<typename Arg1, typename Arg2>
240 bool operator()(proto::tag::function, ianychar_range_p, Arg1 const &arg1, Arg2 const &arg2)
241 {
242 BOOST_ASSERT(proto::value(arg1) <= proto::value(arg2));
243 this->skip();
244 if(this->first == this->second
245 || !utility::in_irange(ch: *this->first, lo: proto::value(arg1), hi: proto::value(arg2)))
246 return false;
247 ++this->first;
248 return true;
249 }
250
251 // parse function for complemented thingies (where thingies are assumed
252 // to be 1 character wide).
253 template<typename Expr>
254 bool operator()(proto::tag::complement, Expr const &expr)
255 {
256 this->skip();
257 iterator where = this->first;
258 if(proto::eval(expr, *this))
259 return this->first = where, false;
260 this->first = ++where;
261 return true;
262 }
263
264 // never_p parse function always returns false.
265 bool operator()(proto::tag::terminal, never_tag)
266 {
267 return false;
268 }
269
270 // for A >> B, succeeds if A and B matches.
271 template<typename Left, typename Right>
272 bool operator()(proto::tag::shift_right, Left const &left, Right const &right)
273 {
274 return proto::eval(left, *this) && proto::eval(right, *this);
275 }
276
277 // for A | B, succeeds if either A or B matches at this point.
278 template<typename Left, typename Right>
279 bool operator()(proto::tag::bitwise_or, Left const &left, Right const &right)
280 {
281 iterator where = this->first;
282 return proto::eval(left, *this) || proto::eval(right, this->reset(where));
283 }
284
285 // for *A, greedily match A as many times as possible.
286 template<typename Expr>
287 bool operator()(proto::tag::dereference, Expr const &expr)
288 {
289 iterator where = this->first;
290 while(proto::eval(expr, *this))
291 where = this->first;
292 // make sure that when we return true, the iterator is at the correct position!
293 this->first = where;
294 return true;
295 }
296
297 // for +A, greedily match A one or more times.
298 template<typename Expr>
299 bool operator()(proto::tag::unary_plus, Expr const &expr)
300 {
301 return proto::eval(expr, *this) && proto::eval(*expr, *this);
302 }
303
304 // for !A, optionally match A.
305 template<typename Expr>
306 bool operator()(proto::tag::logical_not, Expr const &expr)
307 {
308 iterator where = this->first;
309 if(!proto::eval(expr, *this))
310 this->first = where;
311 return true;
312 }
313
314 // for (A - B), matches when A but not B matches.
315 template<typename Left, typename Right>
316 bool operator()(proto::tag::minus, Left const &left, Right const &right)
317 {
318 iterator where = this->first;
319 return !proto::eval(right, *this) && proto::eval(left, this->reset(where));
320 }
321 private:
322 spirit_context &reset(iterator where)
323 {
324 this->first = where;
325 return *this;
326 }
327
328 void skip()
329 {
330 if(!this->in_skip_)
331 {
332 this->in_skip_ = true;
333 while(proto::eval(this->skip_, *this))
334 {}
335 this->in_skip_ = false;
336 }
337 }
338
339 Skipper skip_;
340 bool in_skip_;
341 };
342
343 struct as_ichar_parser : proto::callable
344 {
345 typedef proto::function<
346 ianychar_p
347 , proto::terminal<char>::type
348 , proto::terminal<char>::type
349 >::type result_type;
350
351 template<typename Expr>
352 result_type operator()(Expr const &expr) const
353 {
354 char lo = std::tolower(proto::value(proto::child_c<1>(expr)));
355 char hi = std::toupper(proto::value(proto::child_c<1>(expr)));
356 result_type that = {.child0: ichar_, .child1: {.child0: lo}, .child2: {.child0: hi}};
357 return that;
358 }
359 };
360
361 struct as_ichar_range_parser : proto::callable
362 {
363 typedef proto::function<
364 ianychar_range_p
365 , proto::terminal<char>::type
366 , proto::terminal<char>::type
367 >::type result_type;
368
369 template<typename Expr>
370 result_type operator()(Expr const &expr) const
371 {
372 char lo = proto::value(proto::child_c<1>(expr));
373 char hi = proto::value(proto::child_c<2>(expr));
374 result_type that = {.child0: ichar_range_, .child1: {.child0: lo}, .child2: {.child0: hi}};
375 return that;
376 }
377 };
378
379 struct as_ichar_literal : proto::callable
380 {
381 typedef proto::function<
382 ianychar_p
383 , proto::terminal<char>::type
384 , proto::terminal<char>::type
385 >::type result_type;
386
387 template<typename Expr>
388 result_type operator()(Expr const &expr) const
389 {
390 char lo = std::tolower(proto::value(expr));
391 char hi = std::toupper(proto::value(expr));
392 result_type that = {.child0: ichar_, .child1: {.child0: lo}, .child2: {.child0: hi}};
393 return that;
394 }
395 };
396
397 struct as_intbs_literal : proto::callable
398 {
399 typedef proto::function<
400 ianystr_p
401 , proto::terminal<std::string>::type
402 >::type result_type;
403
404 template<typename Expr>
405 result_type operator()(Expr const &expr) const
406 {
407 result_type that = {istr_, {utility::to_istr(sz: proto::value(expr))}};
408 return that;
409 }
410 };
411
412 struct as_istdstring_literal : proto::callable
413 {
414 typedef proto::function<
415 ianystr_p
416 , proto::terminal<std::string>::type
417 >::type result_type;
418
419 template<typename Expr>
420 result_type operator()(Expr const &expr) const
421 {
422 result_type that = {istr_, {utility::to_istr(sz: proto::value(expr).c_str())}};
423 return that;
424 }
425 };
426
427 ///////////////////////////////////////////////////////////////////////////
428 // Transforms
429 ///////////////////////////////////////////////////////////////////////////
430
431 struct skip_primitives : proto::transform<skip_primitives>
432 {
433 template<typename Expr, typename State, typename Data>
434 struct impl : proto::transform_impl<Expr, State, Data>
435 {
436 typedef
437 typename proto::shift_right<
438 typename proto::dereference<State>::type
439 , Expr
440 >::type
441 result_type;
442
443 result_type operator ()(
444 typename impl::expr_param expr
445 , typename impl::state_param state
446 , typename impl::data_param data
447 ) const
448 {
449 result_type that = {{state}, expr};
450 return that;
451 }
452 };
453 };
454
455 ///////////////////////////////////////////////////////////////////////////
456 // Grammar
457 ///////////////////////////////////////////////////////////////////////////
458 using proto::_;
459
460 struct SpiritGrammar;
461
462 struct SpiritCaseSensitivePrimitives
463 : proto::or_<
464 proto::when<CharParser, as_ichar_parser(_)>
465 , proto::when<CharLiteral, as_ichar_literal(_)>
466 , proto::when<NTBSLiteral, as_intbs_literal(_)>
467 , proto::when<CharRangeParser, as_ichar_range_parser(_)>
468 , proto::when<StdStringLiteral, as_istdstring_literal(_)>
469 >
470 {};
471
472 struct SpiritCaseInsensitivePrimitives
473 : proto::or_<
474 anychar_p
475 , IStrParser
476 , ICharParser
477 , ICharRangeParser
478 , proto::complement<SpiritPrimitives>
479 >
480 {};
481
482 struct SpiritPrimitives
483 : proto::or_<
484 SpiritCaseSensitivePrimitives
485 , SpiritCaseInsensitivePrimitives
486 >
487 {};
488
489 template<typename Grammar>
490 struct SpiritComposites
491 : proto::or_<
492 proto::bitwise_or< Grammar, Grammar >
493 , proto::shift_right< Grammar, Grammar >
494 , proto::minus< Grammar, Grammar >
495 , proto::dereference< Grammar >
496 , proto::unary_plus< Grammar >
497 , proto::logical_not< Grammar >
498 >
499 {};
500
501 // Regular Spirit grammar, has no-case transforms
502 struct SpiritGrammar
503 : proto::or_<
504 SpiritComposites<SpiritGrammar>
505 , SpiritPrimitives
506 >
507 {};
508
509 // Spirit grammar with the skipper transform
510 struct SkipperGrammar
511 : proto::or_<
512 SpiritComposites<SkipperGrammar>
513 , proto::when<SpiritPrimitives, skip_primitives>
514 >
515 {};
516
517 ///////////////////////////////////////////////////////////////////////////
518 // Directives
519 ///////////////////////////////////////////////////////////////////////////
520
521 struct no_case_directive
522 {
523 template<typename Expr>
524 typename boost::result_of<SpiritGrammar(Expr const &)>::type const
525 operator [](Expr const &expr) const
526 {
527 return SpiritGrammar()(expr);
528 }
529 };
530
531 // no_case
532 no_case_directive const no_case = {};
533
534 template<typename Skipper>
535 struct skip_directive
536 {
537 skip_directive(Skipper const &skip)
538 : skip_(skip)
539 {}
540
541 template<typename Expr>
542 typename boost::result_of<SkipperGrammar(Expr const &, Skipper const &)>::type const
543 operator [](Expr const &expr) const
544 {
545 return SkipperGrammar()(expr, this->skip_);
546 }
547 private:
548 Skipper skip_;
549 };
550
551 // skip
552 template<typename Skipper>
553 skip_directive<Skipper> skip(Skipper const &skip)
554 {
555 return skip_directive<Skipper>(skip);
556 }
557
558 ///////////////////////////////////////////////////////////////////////////
559 // parse
560 ///////////////////////////////////////////////////////////////////////////
561
562 template<typename FwdIter, typename Rule>
563 bool parse(FwdIter begin, FwdIter end, Rule const &rule)
564 {
565 // make sure the rule corresponds to the Spirit grammar:
566 BOOST_MPL_ASSERT((proto::matches<Rule, SpiritGrammar>));
567
568 spirit_context<FwdIter> ctx(begin, end);
569 return proto::eval(rule, ctx);
570 }
571
572 // parse with a skip parser can be implemented in one of two ways:
573 // Method 1)
574 // The skip parser is passed to all the parsers which invoke it
575 // before they invoke themselves. This is how Spirit-1 does it,
576 // and it is the cause of the Scanner Business. However, it has
577 // the advantage of not needing a parser transformation phase.
578 // Method 2)
579 // Transform the expression template to insert the skip parser
580 // in between all sequenced parsers. That is, transform (A >> B)
581 // to (*skip >> A >> *skip >> B). This has the advantage of making
582 // it unnecessary to pass the scanner to all the parsers, which
583 // means its type doesn't show up in function signatures, avoiding
584 // the Scanner Business.
585 // Recommendation:
586 // Both methods should be supported. Method 1 should be preferred
587 // when calling parse with parsers defined inline. Method 2 should
588 // be preferred when a parser expression is assigned to a rule<>,
589 // thereby making the type of the rule<> independent of the skip
590 // parser used. I imagine a syntax like:
591 // rule<> r = skip(space)[A >> B >> C]
592 template<typename FwdIter, typename Rule, typename Skipper>
593 bool parse(FwdIter begin, FwdIter end, Rule const &rule, Skipper const &skipper)
594 {
595 // make sure the rule corresponds to the Spirit grammar:
596 BOOST_MPL_ASSERT((proto::matches<Rule, SpiritGrammar>));
597
598 //// Method 1: pass skip parser in the context structure.
599 //spirit_context<FwdIter, Skipper> ctx(begin, end, skipper);
600 //return proto::eval(rule, ctx);
601
602 // Method 2: Embed skip parser via tree transformation.
603 spirit_context<FwdIter> ctx(begin, end);
604 return proto::eval(spirit2::skip(skipper)[rule], ctx);
605 }
606
607}}
608
609using namespace boost;
610using namespace spirit2;
611
612void test_toy_spirit()
613{
614 std::string str("abcd");
615
616 // This will fail:
617 BOOST_CHECK(!spirit2::parse(str.begin(), str.end()
618 , char_ >> char_('a')));
619
620 // This will succeed:
621 BOOST_CHECK(spirit2::parse(str.begin(), str.end()
622 , char_ >> char_('b') >> char_ >> 'd'));
623
624 // This will succeed:
625 BOOST_CHECK(spirit2::parse(str.begin(), str.end()
626 , 'a' >> ('c' >> char_ | 'b' >> char_('d') | 'b' >> char_('c')) >> 'd'));
627
628 // This will succeed:
629 BOOST_CHECK(spirit2::parse(str.begin(), str.end()
630 , *(char_ - 'd')));
631
632 // This will succeed:
633 BOOST_CHECK(spirit2::parse(str.begin(), str.end()
634 , no_case[char_('A') >> 'B' >> "CD"]));
635
636 // This will succeed:
637 BOOST_CHECK(spirit2::parse(str.begin(), str.end()
638 , no_case[*char_('A','Z')]));
639
640 literal<char> a = lit(t: 'a');
641 literal<char const *> bcd = lit(t: "bcd");
642
643 // This will succeed:
644 BOOST_CHECK(spirit2::parse(str.begin(), str.end()
645 , +~~a >> no_case[bcd]));
646
647 // Scanner Business: R.I.P. :-)
648 str = "a b cd";
649 BOOST_CHECK(spirit2::parse(str.begin(), str.end()
650 , char_('a') >> 'b' >> 'c' >> 'd', space >> space));
651
652}
653
654using namespace boost::unit_test;
655///////////////////////////////////////////////////////////////////////////////
656// init_unit_test_suite
657//
658test_suite* init_unit_test_suite( int argc, char* argv[] )
659{
660 test_suite *test = BOOST_TEST_SUITE("test proto and and toy spirit-2");
661
662 test->add(BOOST_TEST_CASE(&test_toy_spirit));
663
664 return test;
665}
666

source code of boost/libs/proto/test/toy_spirit.cpp