1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // toy_spirit.hpp |
3 | // |
4 | // Copyright 2008 Eric Niebler. Distributed under the Boost |
5 | // Software License, Version 1.0. (See accompanying file |
6 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
7 | |
8 | #include <cctype> |
9 | #include <string> |
10 | #include <cstring> |
11 | #include <iostream> |
12 | #include <boost/assert.hpp> |
13 | #include <boost/mpl/assert.hpp> |
14 | #include <boost/proto/core.hpp> |
15 | #include <boost/proto/context.hpp> |
16 | #include <boost/test/unit_test.hpp> |
17 | |
18 | namespace boost |
19 | { |
20 | // global tags |
21 | struct char_tag {}; |
22 | struct ichar_tag {}; |
23 | struct istring_tag {}; |
24 | struct ichar_range_tag {}; |
25 | struct never_tag {}; |
26 | struct always_tag {}; |
27 | struct space_tag {}; |
28 | |
29 | // global primitives |
30 | proto::terminal<char_tag>::type const char_ = {.child0: {}}; |
31 | proto::terminal<space_tag>::type const space = {.child0: {}}; |
32 | |
33 | using proto::lit; |
34 | using proto::literal; |
35 | } |
36 | |
37 | namespace boost { namespace spirit2 |
38 | { |
39 | |
40 | // handy typedefs |
41 | typedef proto::terminal<char_tag>::type anychar_p; |
42 | typedef proto::terminal<ichar_tag>::type ianychar_p; |
43 | typedef proto::terminal<istring_tag>::type ianystr_p; |
44 | typedef proto::terminal<ichar_range_tag>::type ianychar_range_p; |
45 | typedef proto::terminal<never_tag>::type never_p; |
46 | typedef proto::terminal<space_tag>::type space_p; |
47 | |
48 | struct SpiritGrammar; |
49 | struct SkipperGrammar; |
50 | struct SpiritPrimitives; |
51 | template<typename Grammar> |
52 | struct SpiritComposites; |
53 | |
54 | struct CharLiteral |
55 | : proto::terminal<char> |
56 | {}; |
57 | |
58 | struct NTBSLiteral |
59 | : proto::terminal<char const *> |
60 | {}; |
61 | |
62 | struct StdStringLiteral |
63 | : proto::terminal<std::string> |
64 | {}; |
65 | |
66 | struct CharParser |
67 | : proto::function<anychar_p, CharLiteral> |
68 | {}; |
69 | |
70 | struct ICharParser |
71 | : proto::function<ianychar_p, CharLiteral, CharLiteral> |
72 | {}; |
73 | |
74 | struct CharRangeParser |
75 | : proto::function<anychar_p, CharLiteral, CharLiteral> |
76 | {}; |
77 | |
78 | struct IStrParser |
79 | : proto::function<ianystr_p, StdStringLiteral> |
80 | {}; |
81 | |
82 | struct ICharRangeParser |
83 | : proto::function<ianychar_range_p, CharLiteral, CharLiteral> |
84 | {}; |
85 | |
86 | ianychar_p const ichar_ = {.child0: {}}; |
87 | ianystr_p const istr_ = {.child0: {}}; |
88 | ianychar_range_p const ichar_range_ = {.child0: {}}; |
89 | |
90 | namespace utility |
91 | { |
92 | inline bool char_icmp(char ch, char lo, char hi) |
93 | { |
94 | return ch == lo || ch == hi; |
95 | } |
96 | |
97 | template<typename FwdIter> |
98 | inline bool string_cmp(char const *sz, FwdIter &begin, FwdIter end) |
99 | { |
100 | FwdIter tmp = begin; |
101 | for(; *sz; ++tmp, ++sz) |
102 | if(tmp == end || *tmp != *sz) |
103 | return false; |
104 | begin = tmp; |
105 | return true; |
106 | } |
107 | |
108 | template<typename FwdIter> |
109 | inline bool string_icmp(std::string const &str, FwdIter &begin, FwdIter end) |
110 | { |
111 | BOOST_ASSERT(0 == str.size() % 2); |
112 | FwdIter tmp = begin; |
113 | std::string::const_iterator istr = str.begin(), estr = str.end(); |
114 | for(; istr != estr; ++tmp, istr += 2) |
115 | if(tmp == end || (*tmp != *istr && *tmp != *(istr+1))) |
116 | return false; |
117 | begin = tmp; |
118 | return true; |
119 | } |
120 | |
121 | inline bool in_range(char ch, char lo, char hi) |
122 | { |
123 | return ch >= lo && ch <= hi; |
124 | } |
125 | |
126 | inline bool in_irange(char ch, char lo, char hi) |
127 | { |
128 | return in_range(ch, lo, hi) |
129 | || in_range(ch: std::tolower(c: ch), lo, hi) |
130 | || in_range(ch: std::toupper(c: ch), lo, hi); |
131 | } |
132 | |
133 | inline std::string to_istr(char const *sz) |
134 | { |
135 | std::string res; |
136 | res.reserve(res_arg: std::strlen(s: sz) * 2); |
137 | for(; *sz; ++sz) |
138 | { |
139 | res.push_back(c: std::tolower(c: *sz)); |
140 | res.push_back(c: std::toupper(c: *sz)); |
141 | } |
142 | return res; |
143 | } |
144 | } // namespace utility |
145 | |
146 | template<typename FwdIter, typename Skipper = never_p> |
147 | struct spirit_context |
148 | : std::pair<FwdIter, FwdIter> |
149 | , proto::callable_context<spirit_context<FwdIter, Skipper> > |
150 | { |
151 | typedef bool result_type; |
152 | typedef FwdIter iterator; |
153 | |
154 | spirit_context(FwdIter first, FwdIter second, Skipper const &skip = Skipper()) |
155 | : std::pair<FwdIter, FwdIter>(first, second) |
156 | , skip_(skip) |
157 | , in_skip_(false) |
158 | {} |
159 | |
160 | // parse function for anychar_p |
161 | bool operator()(proto::tag::terminal, char_tag) |
162 | { |
163 | this->skip(); |
164 | if(this->first == this->second) |
165 | return false; |
166 | ++this->first; |
167 | return true; |
168 | } |
169 | |
170 | // parse function for char_('a') |
171 | template<typename Expr> |
172 | bool operator()(proto::tag::function, anychar_p, Expr const &expr) |
173 | { |
174 | this->skip(); |
175 | return proto::eval(expr, *this); |
176 | } |
177 | |
178 | // parse function for space_p |
179 | bool operator()(proto::tag::terminal, space_tag) |
180 | { |
181 | this->skip(); |
182 | if(this->first == this->second || !std::isspace(*this->first)) |
183 | return false; |
184 | ++this->first; |
185 | return true; |
186 | } |
187 | |
188 | // parse function for bare character literals |
189 | bool operator()(proto::tag::terminal, char ch) |
190 | { |
191 | this->skip(); |
192 | if(this->first == this->second || *this->first != ch) |
193 | return false; |
194 | ++this->first; |
195 | return true; |
196 | } |
197 | |
198 | // case-insensitive character parser |
199 | template<typename Arg1, typename Arg2> |
200 | bool operator()(proto::tag::function, ianychar_p, Arg1 const &arg1, Arg2 const &arg2) |
201 | { |
202 | this->skip(); |
203 | if(this->first == this->second |
204 | || !utility::char_icmp(ch: *this->first, lo: proto::value(arg1), hi: proto::value(arg2))) |
205 | return false; |
206 | ++this->first; |
207 | return true; |
208 | } |
209 | |
210 | // parse function for NTBS literals |
211 | bool operator()(proto::tag::terminal, char const *sz) |
212 | { |
213 | this->skip(); |
214 | return utility::string_cmp(sz, this->first, this->second); |
215 | } |
216 | |
217 | // parse function for istr_("hello") |
218 | template<typename Expr> |
219 | bool operator()(proto::tag::function, ianystr_p, Expr const &expr) |
220 | { |
221 | this->skip(); |
222 | return utility::string_icmp(proto::value(expr), this->first, this->second); |
223 | } |
224 | |
225 | // parse function for char_('a','z') |
226 | template<typename Arg1, typename Arg2> |
227 | bool operator()(proto::tag::function, anychar_p, Arg1 const &arg1, Arg2 const &arg2) |
228 | { |
229 | BOOST_ASSERT(proto::value(arg1) <= proto::value(arg2)); |
230 | this->skip(); |
231 | if(this->first == this->second |
232 | || !utility::in_range(ch: *this->first, lo: proto::value(arg1), hi: proto::value(arg2))) |
233 | return false; |
234 | ++this->first; |
235 | return true; |
236 | } |
237 | |
238 | // parse function for ichar_range_('a','z') |
239 | template<typename Arg1, typename Arg2> |
240 | bool operator()(proto::tag::function, ianychar_range_p, Arg1 const &arg1, Arg2 const &arg2) |
241 | { |
242 | BOOST_ASSERT(proto::value(arg1) <= proto::value(arg2)); |
243 | this->skip(); |
244 | if(this->first == this->second |
245 | || !utility::in_irange(ch: *this->first, lo: proto::value(arg1), hi: proto::value(arg2))) |
246 | return false; |
247 | ++this->first; |
248 | return true; |
249 | } |
250 | |
251 | // parse function for complemented thingies (where thingies are assumed |
252 | // to be 1 character wide). |
253 | template<typename Expr> |
254 | bool operator()(proto::tag::complement, Expr const &expr) |
255 | { |
256 | this->skip(); |
257 | iterator where = this->first; |
258 | if(proto::eval(expr, *this)) |
259 | return this->first = where, false; |
260 | this->first = ++where; |
261 | return true; |
262 | } |
263 | |
264 | // never_p parse function always returns false. |
265 | bool operator()(proto::tag::terminal, never_tag) |
266 | { |
267 | return false; |
268 | } |
269 | |
270 | // for A >> B, succeeds if A and B matches. |
271 | template<typename Left, typename Right> |
272 | bool operator()(proto::tag::shift_right, Left const &left, Right const &right) |
273 | { |
274 | return proto::eval(left, *this) && proto::eval(right, *this); |
275 | } |
276 | |
277 | // for A | B, succeeds if either A or B matches at this point. |
278 | template<typename Left, typename Right> |
279 | bool operator()(proto::tag::bitwise_or, Left const &left, Right const &right) |
280 | { |
281 | iterator where = this->first; |
282 | return proto::eval(left, *this) || proto::eval(right, this->reset(where)); |
283 | } |
284 | |
285 | // for *A, greedily match A as many times as possible. |
286 | template<typename Expr> |
287 | bool operator()(proto::tag::dereference, Expr const &expr) |
288 | { |
289 | iterator where = this->first; |
290 | while(proto::eval(expr, *this)) |
291 | where = this->first; |
292 | // make sure that when we return true, the iterator is at the correct position! |
293 | this->first = where; |
294 | return true; |
295 | } |
296 | |
297 | // for +A, greedily match A one or more times. |
298 | template<typename Expr> |
299 | bool operator()(proto::tag::unary_plus, Expr const &expr) |
300 | { |
301 | return proto::eval(expr, *this) && proto::eval(*expr, *this); |
302 | } |
303 | |
304 | // for !A, optionally match A. |
305 | template<typename Expr> |
306 | bool operator()(proto::tag::logical_not, Expr const &expr) |
307 | { |
308 | iterator where = this->first; |
309 | if(!proto::eval(expr, *this)) |
310 | this->first = where; |
311 | return true; |
312 | } |
313 | |
314 | // for (A - B), matches when A but not B matches. |
315 | template<typename Left, typename Right> |
316 | bool operator()(proto::tag::minus, Left const &left, Right const &right) |
317 | { |
318 | iterator where = this->first; |
319 | return !proto::eval(right, *this) && proto::eval(left, this->reset(where)); |
320 | } |
321 | private: |
322 | spirit_context &reset(iterator where) |
323 | { |
324 | this->first = where; |
325 | return *this; |
326 | } |
327 | |
328 | void skip() |
329 | { |
330 | if(!this->in_skip_) |
331 | { |
332 | this->in_skip_ = true; |
333 | while(proto::eval(this->skip_, *this)) |
334 | {} |
335 | this->in_skip_ = false; |
336 | } |
337 | } |
338 | |
339 | Skipper skip_; |
340 | bool in_skip_; |
341 | }; |
342 | |
343 | struct as_ichar_parser : proto::callable |
344 | { |
345 | typedef proto::function< |
346 | ianychar_p |
347 | , proto::terminal<char>::type |
348 | , proto::terminal<char>::type |
349 | >::type result_type; |
350 | |
351 | template<typename Expr> |
352 | result_type operator()(Expr const &expr) const |
353 | { |
354 | char lo = std::tolower(proto::value(proto::child_c<1>(expr))); |
355 | char hi = std::toupper(proto::value(proto::child_c<1>(expr))); |
356 | result_type that = {.child0: ichar_, .child1: {.child0: lo}, .child2: {.child0: hi}}; |
357 | return that; |
358 | } |
359 | }; |
360 | |
361 | struct as_ichar_range_parser : proto::callable |
362 | { |
363 | typedef proto::function< |
364 | ianychar_range_p |
365 | , proto::terminal<char>::type |
366 | , proto::terminal<char>::type |
367 | >::type result_type; |
368 | |
369 | template<typename Expr> |
370 | result_type operator()(Expr const &expr) const |
371 | { |
372 | char lo = proto::value(proto::child_c<1>(expr)); |
373 | char hi = proto::value(proto::child_c<2>(expr)); |
374 | result_type that = {.child0: ichar_range_, .child1: {.child0: lo}, .child2: {.child0: hi}}; |
375 | return that; |
376 | } |
377 | }; |
378 | |
379 | struct as_ichar_literal : proto::callable |
380 | { |
381 | typedef proto::function< |
382 | ianychar_p |
383 | , proto::terminal<char>::type |
384 | , proto::terminal<char>::type |
385 | >::type result_type; |
386 | |
387 | template<typename Expr> |
388 | result_type operator()(Expr const &expr) const |
389 | { |
390 | char lo = std::tolower(proto::value(expr)); |
391 | char hi = std::toupper(proto::value(expr)); |
392 | result_type that = {.child0: ichar_, .child1: {.child0: lo}, .child2: {.child0: hi}}; |
393 | return that; |
394 | } |
395 | }; |
396 | |
397 | struct as_intbs_literal : proto::callable |
398 | { |
399 | typedef proto::function< |
400 | ianystr_p |
401 | , proto::terminal<std::string>::type |
402 | >::type result_type; |
403 | |
404 | template<typename Expr> |
405 | result_type operator()(Expr const &expr) const |
406 | { |
407 | result_type that = {istr_, {utility::to_istr(sz: proto::value(expr))}}; |
408 | return that; |
409 | } |
410 | }; |
411 | |
412 | struct as_istdstring_literal : proto::callable |
413 | { |
414 | typedef proto::function< |
415 | ianystr_p |
416 | , proto::terminal<std::string>::type |
417 | >::type result_type; |
418 | |
419 | template<typename Expr> |
420 | result_type operator()(Expr const &expr) const |
421 | { |
422 | result_type that = {istr_, {utility::to_istr(sz: proto::value(expr).c_str())}}; |
423 | return that; |
424 | } |
425 | }; |
426 | |
427 | /////////////////////////////////////////////////////////////////////////// |
428 | // Transforms |
429 | /////////////////////////////////////////////////////////////////////////// |
430 | |
431 | struct skip_primitives : proto::transform<skip_primitives> |
432 | { |
433 | template<typename Expr, typename State, typename Data> |
434 | struct impl : proto::transform_impl<Expr, State, Data> |
435 | { |
436 | typedef |
437 | typename proto::shift_right< |
438 | typename proto::dereference<State>::type |
439 | , Expr |
440 | >::type |
441 | result_type; |
442 | |
443 | result_type operator ()( |
444 | typename impl::expr_param expr |
445 | , typename impl::state_param state |
446 | , typename impl::data_param data |
447 | ) const |
448 | { |
449 | result_type that = {{state}, expr}; |
450 | return that; |
451 | } |
452 | }; |
453 | }; |
454 | |
455 | /////////////////////////////////////////////////////////////////////////// |
456 | // Grammar |
457 | /////////////////////////////////////////////////////////////////////////// |
458 | using proto::_; |
459 | |
460 | struct SpiritGrammar; |
461 | |
462 | struct SpiritCaseSensitivePrimitives |
463 | : proto::or_< |
464 | proto::when<CharParser, as_ichar_parser(_)> |
465 | , proto::when<CharLiteral, as_ichar_literal(_)> |
466 | , proto::when<NTBSLiteral, as_intbs_literal(_)> |
467 | , proto::when<CharRangeParser, as_ichar_range_parser(_)> |
468 | , proto::when<StdStringLiteral, as_istdstring_literal(_)> |
469 | > |
470 | {}; |
471 | |
472 | struct SpiritCaseInsensitivePrimitives |
473 | : proto::or_< |
474 | anychar_p |
475 | , IStrParser |
476 | , ICharParser |
477 | , ICharRangeParser |
478 | , proto::complement<SpiritPrimitives> |
479 | > |
480 | {}; |
481 | |
482 | struct SpiritPrimitives |
483 | : proto::or_< |
484 | SpiritCaseSensitivePrimitives |
485 | , SpiritCaseInsensitivePrimitives |
486 | > |
487 | {}; |
488 | |
489 | template<typename Grammar> |
490 | struct SpiritComposites |
491 | : proto::or_< |
492 | proto::bitwise_or< Grammar, Grammar > |
493 | , proto::shift_right< Grammar, Grammar > |
494 | , proto::minus< Grammar, Grammar > |
495 | , proto::dereference< Grammar > |
496 | , proto::unary_plus< Grammar > |
497 | , proto::logical_not< Grammar > |
498 | > |
499 | {}; |
500 | |
501 | // Regular Spirit grammar, has no-case transforms |
502 | struct SpiritGrammar |
503 | : proto::or_< |
504 | SpiritComposites<SpiritGrammar> |
505 | , SpiritPrimitives |
506 | > |
507 | {}; |
508 | |
509 | // Spirit grammar with the skipper transform |
510 | struct SkipperGrammar |
511 | : proto::or_< |
512 | SpiritComposites<SkipperGrammar> |
513 | , proto::when<SpiritPrimitives, skip_primitives> |
514 | > |
515 | {}; |
516 | |
517 | /////////////////////////////////////////////////////////////////////////// |
518 | // Directives |
519 | /////////////////////////////////////////////////////////////////////////// |
520 | |
521 | struct no_case_directive |
522 | { |
523 | template<typename Expr> |
524 | typename boost::result_of<SpiritGrammar(Expr const &)>::type const |
525 | operator [](Expr const &expr) const |
526 | { |
527 | return SpiritGrammar()(expr); |
528 | } |
529 | }; |
530 | |
531 | // no_case |
532 | no_case_directive const no_case = {}; |
533 | |
534 | template<typename Skipper> |
535 | struct skip_directive |
536 | { |
537 | skip_directive(Skipper const &skip) |
538 | : skip_(skip) |
539 | {} |
540 | |
541 | template<typename Expr> |
542 | typename boost::result_of<SkipperGrammar(Expr const &, Skipper const &)>::type const |
543 | operator [](Expr const &expr) const |
544 | { |
545 | return SkipperGrammar()(expr, this->skip_); |
546 | } |
547 | private: |
548 | Skipper skip_; |
549 | }; |
550 | |
551 | // skip |
552 | template<typename Skipper> |
553 | skip_directive<Skipper> skip(Skipper const &skip) |
554 | { |
555 | return skip_directive<Skipper>(skip); |
556 | } |
557 | |
558 | /////////////////////////////////////////////////////////////////////////// |
559 | // parse |
560 | /////////////////////////////////////////////////////////////////////////// |
561 | |
562 | template<typename FwdIter, typename Rule> |
563 | bool parse(FwdIter begin, FwdIter end, Rule const &rule) |
564 | { |
565 | // make sure the rule corresponds to the Spirit grammar: |
566 | BOOST_MPL_ASSERT((proto::matches<Rule, SpiritGrammar>)); |
567 | |
568 | spirit_context<FwdIter> ctx(begin, end); |
569 | return proto::eval(rule, ctx); |
570 | } |
571 | |
572 | // parse with a skip parser can be implemented in one of two ways: |
573 | // Method 1) |
574 | // The skip parser is passed to all the parsers which invoke it |
575 | // before they invoke themselves. This is how Spirit-1 does it, |
576 | // and it is the cause of the Scanner Business. However, it has |
577 | // the advantage of not needing a parser transformation phase. |
578 | // Method 2) |
579 | // Transform the expression template to insert the skip parser |
580 | // in between all sequenced parsers. That is, transform (A >> B) |
581 | // to (*skip >> A >> *skip >> B). This has the advantage of making |
582 | // it unnecessary to pass the scanner to all the parsers, which |
583 | // means its type doesn't show up in function signatures, avoiding |
584 | // the Scanner Business. |
585 | // Recommendation: |
586 | // Both methods should be supported. Method 1 should be preferred |
587 | // when calling parse with parsers defined inline. Method 2 should |
588 | // be preferred when a parser expression is assigned to a rule<>, |
589 | // thereby making the type of the rule<> independent of the skip |
590 | // parser used. I imagine a syntax like: |
591 | // rule<> r = skip(space)[A >> B >> C] |
592 | template<typename FwdIter, typename Rule, typename Skipper> |
593 | bool parse(FwdIter begin, FwdIter end, Rule const &rule, Skipper const &skipper) |
594 | { |
595 | // make sure the rule corresponds to the Spirit grammar: |
596 | BOOST_MPL_ASSERT((proto::matches<Rule, SpiritGrammar>)); |
597 | |
598 | //// Method 1: pass skip parser in the context structure. |
599 | //spirit_context<FwdIter, Skipper> ctx(begin, end, skipper); |
600 | //return proto::eval(rule, ctx); |
601 | |
602 | // Method 2: Embed skip parser via tree transformation. |
603 | spirit_context<FwdIter> ctx(begin, end); |
604 | return proto::eval(spirit2::skip(skipper)[rule], ctx); |
605 | } |
606 | |
607 | }} |
608 | |
609 | using namespace boost; |
610 | using namespace spirit2; |
611 | |
612 | void test_toy_spirit() |
613 | { |
614 | std::string str("abcd" ); |
615 | |
616 | // This will fail: |
617 | BOOST_CHECK(!spirit2::parse(str.begin(), str.end() |
618 | , char_ >> char_('a'))); |
619 | |
620 | // This will succeed: |
621 | BOOST_CHECK(spirit2::parse(str.begin(), str.end() |
622 | , char_ >> char_('b') >> char_ >> 'd')); |
623 | |
624 | // This will succeed: |
625 | BOOST_CHECK(spirit2::parse(str.begin(), str.end() |
626 | , 'a' >> ('c' >> char_ | 'b' >> char_('d') | 'b' >> char_('c')) >> 'd')); |
627 | |
628 | // This will succeed: |
629 | BOOST_CHECK(spirit2::parse(str.begin(), str.end() |
630 | , *(char_ - 'd'))); |
631 | |
632 | // This will succeed: |
633 | BOOST_CHECK(spirit2::parse(str.begin(), str.end() |
634 | , no_case[char_('A') >> 'B' >> "CD" ])); |
635 | |
636 | // This will succeed: |
637 | BOOST_CHECK(spirit2::parse(str.begin(), str.end() |
638 | , no_case[*char_('A','Z')])); |
639 | |
640 | literal<char> a = lit(t: 'a'); |
641 | literal<char const *> bcd = lit(t: "bcd" ); |
642 | |
643 | // This will succeed: |
644 | BOOST_CHECK(spirit2::parse(str.begin(), str.end() |
645 | , +~~a >> no_case[bcd])); |
646 | |
647 | // Scanner Business: R.I.P. :-) |
648 | str = "a b cd" ; |
649 | BOOST_CHECK(spirit2::parse(str.begin(), str.end() |
650 | , char_('a') >> 'b' >> 'c' >> 'd', space >> space)); |
651 | |
652 | } |
653 | |
654 | using namespace boost::unit_test; |
655 | /////////////////////////////////////////////////////////////////////////////// |
656 | // init_unit_test_suite |
657 | // |
658 | test_suite* init_unit_test_suite( int argc, char* argv[] ) |
659 | { |
660 | test_suite *test = BOOST_TEST_SUITE("test proto and and toy spirit-2" ); |
661 | |
662 | test->add(BOOST_TEST_CASE(&test_toy_spirit)); |
663 | |
664 | return test; |
665 | } |
666 | |