1// Copyright (c) 2001-2010 Hartmut Kaiser
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6// This example shows how to create a simple lexer recognizing a couple of
7// different tokens and how to use this with a grammar. This example has a
8// heavily backtracking grammar which makes it a candidate for lexer based
9// parsing (all tokens are scanned and generated only once, even if
10// backtracking is required) which speeds up the overall parsing process
11// considerably, out-weighting the overhead needed for setting up the lexer.
12//
13// Additionally, this example demonstrates, how to define a token set usable
14// as the skip parser during parsing, allowing to define several tokens to be
15// ignored.
16//
17// This example recognizes couplets, which are sequences of numbers enclosed
18// in matching pairs of parenthesis. See the comments below to for details
19// and examples.
20
21// #define BOOST_SPIRIT_LEXERTL_DEBUG
22// #define BOOST_SPIRIT_DEBUG
23
24#include <boost/spirit/include/qi.hpp>
25#include <boost/spirit/include/lex_lexertl.hpp>
26
27#include <iostream>
28#include <fstream>
29#include <string>
30
31#include "example.hpp"
32
33using namespace boost::spirit;
34
35///////////////////////////////////////////////////////////////////////////////
36// Token definition
37///////////////////////////////////////////////////////////////////////////////
38template <typename Lexer>
39struct example3_tokens : lex::lexer<Lexer>
40{
41 example3_tokens()
42 {
43 // define the tokens to match
44 ellipses = "\\.\\.\\.";
45 number = "[0-9]+";
46
47 // associate the tokens and the token set with the lexer
48 this->self = ellipses | '(' | ')' | number;
49
50 // define the whitespace to ignore (spaces, tabs, newlines and C-style
51 // comments)
52 this->self("WS")
53 = lex::token_def<>("[ \\t\\n]+") // whitespace
54 | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" // C style comments
55 ;
56 }
57
58 // these tokens expose the iterator_range of the matched input sequence
59 lex::token_def<> ellipses, identifier, number;
60};
61
62///////////////////////////////////////////////////////////////////////////////
63// Grammar definition
64///////////////////////////////////////////////////////////////////////////////
65template <typename Iterator, typename Lexer>
66struct example3_grammar
67 : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
68{
69 template <typename TokenDef>
70 example3_grammar(TokenDef const& tok)
71 : example3_grammar::base_type(start)
72 {
73 start
74 = +(couplet | tok.ellipses)
75 ;
76
77 // A couplet matches nested left and right parenthesis.
78 // For example:
79 // (1) (1 2) (1 2 3) ...
80 // ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
81 // (((1))) ...
82 couplet
83 = tok.number
84 | '(' >> +couplet >> ')'
85 ;
86
87 BOOST_SPIRIT_DEBUG_NODE(start);
88 BOOST_SPIRIT_DEBUG_NODE(couplet);
89 }
90
91 qi::rule<Iterator, qi::in_state_skipper<Lexer> > start, couplet;
92};
93
94///////////////////////////////////////////////////////////////////////////////
95int main()
96{
97 // iterator type used to expose the underlying input stream
98 typedef std::string::iterator base_iterator_type;
99
100 // This is the token type to return from the lexer iterator
101 typedef lex::lexertl::token<base_iterator_type> token_type;
102
103 // This is the lexer type to use to tokenize the input.
104 // Here we use the lexertl based lexer engine.
105 typedef lex::lexertl::lexer<token_type> lexer_type;
106
107 // This is the token definition type (derived from the given lexer type).
108 typedef example3_tokens<lexer_type> example3_tokens;
109
110 // this is the iterator type exposed by the lexer
111 typedef example3_tokens::iterator_type iterator_type;
112
113 // this is the type of the grammar to parse
114 typedef example3_grammar<iterator_type, example3_tokens::lexer_def> example3_grammar;
115
116 // now we use the types defined above to create the lexer and grammar
117 // object instances needed to invoke the parsing process
118 example3_tokens tokens; // Our lexer
119 example3_grammar calc(tokens); // Our parser
120
121 std::string str (read_from_file(infile: "example3.input"));
122
123 // At this point we generate the iterator pair used to expose the
124 // tokenized input stream.
125 std::string::iterator it = str.begin();
126 iterator_type iter = tokens.begin(first&: it, last: str.end());
127 iterator_type end = tokens.end();
128
129 // Parsing is done based on the token stream, not the character
130 // stream read from the input.
131 // Note how we use the lexer defined above as the skip parser.
132 bool r = qi::phrase_parse(first&: iter, last: end, expr&: calc, skipper: qi::in_state("WS")[tokens.self]);
133
134 if (r && iter == end)
135 {
136 std::cout << "-------------------------\n";
137 std::cout << "Parsing succeeded\n";
138 std::cout << "-------------------------\n";
139 }
140 else
141 {
142 std::cout << "-------------------------\n";
143 std::cout << "Parsing failed\n";
144 std::cout << "-------------------------\n";
145 }
146
147 std::cout << "Bye... :-) \n\n";
148 return 0;
149}
150

source code of boost/libs/spirit/example/lex/example3.cpp