1// Copyright (c) 2001-2010 Hartmut Kaiser
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6// This example is the equivalent to the following lex program:
7//
8// %{
9// /* INITIAL is the default start state. COMMENT is our new */
10// /* state where we remove comments. */
11// %}
12//
13// %s COMMENT
14// %%
15// <INITIAL>"//".* ;
16// <INITIAL>"/*" BEGIN COMMENT;
17// <INITIAL>. ECHO;
18// <INITIAL>[\n] ECHO;
19// <COMMENT>"*/" BEGIN INITIAL;
20// <COMMENT>. ;
21// <COMMENT>[\n] ;
22// %%
23//
24// main()
25// {
26// yylex();
27// }
28//
29// Its purpose is to strip comments out of C code.
30//
31// Additionally this example demonstrates the use of lexer states to structure
32// the lexer definition.
33
34// #define BOOST_SPIRIT_LEXERTL_DEBUG
35
36#include <boost/spirit/include/qi.hpp>
37#include <boost/spirit/include/lex_lexertl.hpp>
38#include <boost/phoenix/operator.hpp>
39#include <boost/phoenix/stl/container.hpp>
40
41#include <iostream>
42#include <string>
43
44#include "example.hpp"
45
46using namespace boost::spirit;
47
48///////////////////////////////////////////////////////////////////////////////
49// Token definition: We use the lexertl based lexer engine as the underlying
50// lexer type.
51///////////////////////////////////////////////////////////////////////////////
52enum tokenids
53{
54 IDANY = lex::min_token_id + 10
55};
56
57template <typename Lexer>
58struct strip_comments_tokens : lex::lexer<Lexer>
59{
60 strip_comments_tokens()
61 : strip_comments_tokens::base_type(lex::match_flags::match_default)
62 {
63 // define tokens and associate them with the lexer
64 cppcomment = "\"//\"[^\n]*"; // '//[^\n]*'
65 ccomment = "\"/*\""; // '/*'
66 endcomment = "\"*/\""; // '*/'
67
68 // The following tokens are associated with the default lexer state
69 // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
70 // strictly optional.
71 this->self.add
72 (cppcomment) // no explicit token id is associated
73 (ccomment)
74 (".", IDANY) // IDANY is the token id associated with this token
75 // definition
76 ;
77
78 // The following tokens are associated with the lexer state "COMMENT".
79 // We switch lexer states from inside the parsing process using the
80 // in_state("COMMENT")[] parser component as shown below.
81 this->self("COMMENT").add
82 (endcomment)
83 (".", IDANY)
84 ;
85 }
86
87 lex::token_def<> cppcomment, ccomment, endcomment;
88};
89
90///////////////////////////////////////////////////////////////////////////////
91// Grammar definition
92///////////////////////////////////////////////////////////////////////////////
93template <typename Iterator>
94struct strip_comments_grammar : qi::grammar<Iterator>
95{
96 template <typename TokenDef>
97 strip_comments_grammar(TokenDef const& tok)
98 : strip_comments_grammar::base_type(start)
99 {
100 // The in_state("COMMENT")[...] parser component switches the lexer
101 // state to be 'COMMENT' during the matching of the embedded parser.
102 start = *( tok.ccomment
103 >> qi::in_state("COMMENT")
104 [
105 // the lexer is in the 'COMMENT' state during
106 // matching of the following parser components
107 *token(IDANY) >> tok.endcomment
108 ]
109 | tok.cppcomment
110 | qi::token(IDANY) [ std::cout << _1 ]
111 )
112 ;
113 }
114
115 qi::rule<Iterator> start;
116};
117
118///////////////////////////////////////////////////////////////////////////////
119int main(int argc, char* argv[])
120{
121 // iterator type used to expose the underlying input stream
122 typedef std::string::iterator base_iterator_type;
123
124 // lexer type
125 typedef
126 lex::lexertl::lexer<lex::lexertl::token<base_iterator_type> >
127 lexer_type;
128
129 // iterator type exposed by the lexer
130 typedef strip_comments_tokens<lexer_type>::iterator_type iterator_type;
131
132 // now we use the types defined above to create the lexer and grammar
133 // object instances needed to invoke the parsing process
134 strip_comments_tokens<lexer_type> strip_comments; // Our lexer
135 strip_comments_grammar<iterator_type> g (strip_comments); // Our parser
136
137 // Parsing is done based on the token stream, not the character
138 // stream read from the input.
139 std::string str (read_from_file(infile: 1 == argc ? "strip_comments.input" : argv[1]));
140 base_iterator_type first = str.begin();
141
142 bool r = lex::tokenize_and_parse(first, last: str.end(), lex: strip_comments, xpr: g);
143
144 if (r) {
145 std::cout << "-------------------------\n";
146 std::cout << "Parsing succeeded\n";
147 std::cout << "-------------------------\n";
148 }
149 else {
150 std::string rest(first, str.end());
151 std::cout << "-------------------------\n";
152 std::cout << "Parsing failed\n";
153 std::cout << "stopped at: \"" << rest << "\"\n";
154 std::cout << "-------------------------\n";
155 }
156
157 std::cout << "Bye... :-) \n\n";
158 return 0;
159}
160
161
162
163

source code of boost/libs/spirit/example/lex/strip_comments.cpp