1// Copyright (c) 2001-2010 Hartmut Kaiser
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6// This example is the equivalent to the following lex program:
7/*
8//[wcp_flex_version
9 %{
10 int c = 0, w = 0, l = 0;
11 %}
12 word [^ \t\n]+
13 eol \n
14 %%
15 {word} { ++w; c += yyleng; }
16 {eol} { ++c; ++l; }
17 . { ++c; }
18 %%
19 main()
20 {
21 yylex();
22 printf("%d %d %d\n", l, w, c);
23 }
24//]
25*/
26// Its purpose is to do the word count function of the wc command in UNIX. It
27// prints the number of lines, words and characters in a file.
28//
29// The example additionally demonstrates how to use the add_pattern(...)(...)
30// syntax to define lexer patterns. These patterns are essentially parameter-
31// less 'macros' for regular expressions, allowing to simplify their
32// definition.
33
34// #define BOOST_SPIRIT_LEXERTL_DEBUG
35#define BOOST_VARIANT_MINIMIZE_SIZE
36
37//[wcp_includes
38#include <boost/spirit/include/qi.hpp>
39#include <boost/spirit/include/lex_lexertl.hpp>
40#include <boost/phoenix/operator.hpp>
41#include <boost/phoenix/statement.hpp>
42#include <boost/phoenix/stl/container.hpp>
43//]
44
45#include <iostream>
46#include <string>
47
48#include "example.hpp"
49
50//[wcp_namespaces
51using namespace boost::spirit;
52using namespace boost::spirit::ascii;
53//]
54
55///////////////////////////////////////////////////////////////////////////////
56// Token definition: We use the lexertl based lexer engine as the underlying
57// lexer type.
58///////////////////////////////////////////////////////////////////////////////
59//[wcp_token_ids
60enum tokenids
61{
62 IDANY = lex::min_token_id + 10
63};
64//]
65
66//[wcp_token_definition
67template <typename Lexer>
68struct word_count_tokens : lex::lexer<Lexer>
69{
70 word_count_tokens()
71 {
72 // define patterns (lexer macros) to be used during token definition
73 // below
74 this->self.add_pattern
75 ("WORD", "[^ \t\n]+")
76 ;
77
78 // define tokens and associate them with the lexer
79 word = "{WORD}"; // reference the pattern 'WORD' as defined above
80
81 // this lexer will recognize 3 token types: words, newlines, and
82 // everything else
83 this->self.add
84 (word) // no token id is needed here
85 ('\n') // characters are usable as tokens as well
86 (".", IDANY) // string literals will not be escaped by the library
87 ;
88 }
89
90 // the token 'word' exposes the matched string as its parser attribute
91 lex::token_def<std::string> word;
92};
93//]
94
95///////////////////////////////////////////////////////////////////////////////
96// Grammar definition
97///////////////////////////////////////////////////////////////////////////////
98//[wcp_grammar_definition
99template <typename Iterator>
100struct word_count_grammar : qi::grammar<Iterator>
101{
102 template <typename TokenDef>
103 word_count_grammar(TokenDef const& tok)
104 : word_count_grammar::base_type(start)
105 , c(0), w(0), l(0)
106 {
107 using boost::phoenix::ref;
108 using boost::phoenix::size;
109
110 start = *( tok.word [++ref(w), ref(c) += size(_1)]
111 | lit('\n') [++ref(c), ++ref(l)]
112 | qi::token(IDANY) [++ref(c)]
113 )
114 ;
115 }
116
117 std::size_t c, w, l;
118 qi::rule<Iterator> start;
119};
120//]
121
122///////////////////////////////////////////////////////////////////////////////
123//[wcp_main
124int main(int argc, char* argv[])
125{
126/*< Define the token type to be used: `std::string` is available as the
127 type of the token attribute
128>*/ typedef lex::lexertl::token<
129 char const*, boost::mpl::vector<std::string>
130 > token_type;
131
132/*< Define the lexer type to use implementing the state machine
133>*/ typedef lex::lexertl::lexer<token_type> lexer_type;
134
135/*< Define the iterator type exposed by the lexer type
136>*/ typedef word_count_tokens<lexer_type>::iterator_type iterator_type;
137
138 // now we use the types defined above to create the lexer and grammar
139 // object instances needed to invoke the parsing process
140 word_count_tokens<lexer_type> word_count; // Our lexer
141 word_count_grammar<iterator_type> g (word_count); // Our parser
142
143 // read in the file int memory
144 std::string str (read_from_file(infile: 1 == argc ? "word_count.input" : argv[1]));
145 char const* first = str.c_str();
146 char const* last = &first[str.size()];
147
148/*< Parsing is done based on the token stream, not the character
149 stream read from the input. The function `tokenize_and_parse()` wraps
150 the passed iterator range `[first, last)` by the lexical analyzer and
151 uses its exposed iterators to parse the token stream.
152>*/ bool r = lex::tokenize_and_parse(first, last, lex: word_count, xpr: g);
153
154 if (r) {
155 std::cout << "lines: " << g.l << ", words: " << g.w
156 << ", characters: " << g.c << "\n";
157 }
158 else {
159 std::string rest(first, last);
160 std::cerr << "Parsing failed\n" << "stopped at: \""
161 << rest << "\"\n";
162 }
163 return 0;
164}
165//]
166

source code of boost/libs/spirit/example/lex/word_count.cpp