word_count_functor.cpp source code [boost/libs/spirit/example/lex/word_count_functor.cpp]

1	// Copyright (c) 2001-2010 Hartmut Kaiser
2	//
3	// Distributed under the Boost Software License, Version 1.0. (See accompanying
4	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6	// This example is the equivalent to the following flex program:
7	/*
8	//[wcf_flex_version
9	%{
10	#define ID_WORD 1000
11	#define ID_EOL 1001
12	#define ID_CHAR 1002
13	int c = 0, w = 0, l = 0;
14	%}
15	%%
16	[^ \t\n]+ { return ID_WORD; }
17	\n { return ID_EOL; }
18	. { return ID_CHAR; }
19	%%
20	bool count(int tok)
21	{
22	switch (tok) {
23	case ID_WORD: ++w; c += yyleng; break;
24	case ID_EOL: ++l; ++c; break;
25	case ID_CHAR: ++c; break;
26	default:
27	return false;
28	}
29	return true;
30	}
31	void main()
32	{
33	int tok = EOF;
34	do {
35	tok = yylex();
36	if (!count(tok))
37	break;
38	} while (EOF != tok);
39	printf("%d %d %d\n", l, w, c);
40	}
41	//]
42	*/
43	// Its purpose is to do the word count function of the wc command in UNIX. It
44	// prints the number of lines, words and characters in a file.
45	//
46	// This examples shows how to use the tokenize() function together with a
47	// simple functor, which gets executed whenever a token got matched in the
48	// input sequence.
49
50	// #define BOOST_SPIRIT_LEXERTL_DEBUG
51
52	//[wcf_includes
53	#include <boost/spirit/include/lex_lexertl.hpp>
54	#include <boost/bind/bind.hpp>
55	#include <boost/ref.hpp>
56	//]
57
58	#include <iostream>
59	#include <string>
60
61	#include "example.hpp"
62
63	//[wcf_namespaces
64	namespace lex = boost::spirit::lex;
65	//]
66
67	///////////////////////////////////////////////////////////////////////////////
68	// Token id definitions
69	///////////////////////////////////////////////////////////////////////////////
70	//[wcf_token_ids
71	enum token_ids
72	{
73	ID_WORD = `1000`,
74	ID_EOL,
75	ID_CHAR
76	};
77	//]
78
79	//[wcf_token_definition
80	/` The template `word_count_tokens` defines three different tokens:*
81	`ID_WORD`, `ID_EOL`, and `ID_CHAR`, representing a word (anything except
82	a whitespace or a newline), a newline character, and any other character
83	(`ID_WORD`, `ID_EOL`, and `ID_CHAR` are enum values representing the token
84	ids, but could be anything else convertible to an integer as well).
85	The direct base class of any token definition class needs to be the
86	template `lex::lexer<>`, where the corresponding template parameter (here:
87	`lex::lexertl::lexer<BaseIterator>`) defines which underlying lexer engine has
88	to be used to provide the required state machine functionality. In this
89	example we use the Lexertl based lexer engine as the underlying lexer type.
90	*/
91	template <typename Lexer>
92	struct word_count_tokens : lex::lexer<Lexer>
93	{
94	word_count_tokens()
95	{
96	// define tokens (the regular expression to match and the corresponding
97	// token id) and add them to the lexer
98	this->self.add
99	("[^ \t\n]+", ID_WORD) // words (anything except ' ', '\t' or '\n')
100	("\n", ID_EOL) // newline characters
101	(".", ID_CHAR) // anything else is a plain character
102	;
103	}
104	};
105	//]
106
107	//[wcf_functor
108	/` In this example the struct 'counter' is used as a functor counting the*
109	characters, words and lines in the analyzed input sequence by identifying
110	the matched tokens as passed from the /Spirit.Lex/ library.
111	*/
112	struct counter
113	{
114	//<- this is an implementation detail specific to boost::bind and doesn't show
115	// up in the documentation
116	typedef bool result_type;
117	//->
118	// the function operator gets called for each of the matched tokens
119	// c, l, w are references to the counters used to keep track of the numbers
120	template <typename Token>
121	bool operator()(Token const& t, std::size_t& c, std::size_t& w, std::size_t& l) const
122	{
123	switch (t.id()) {
124	case ID_WORD: // matched a word
125	// since we're using a default token type in this example, every
126	// token instance contains a `iterator_range<BaseIterator>` as its token
127	// attribute pointing to the matched character sequence in the input
128	++w; c += t.value().size();
129	break;
130	case ID_EOL: // matched a newline character
131	++l; ++c;
132	break;
133	case ID_CHAR: // matched something else
134	++c;
135	break;
136	}
137	return true; // always continue to tokenize
138	}
139	};
140	//]
141
142	///////////////////////////////////////////////////////////////////////////////
143	//[wcf_main
144	/` The main function simply loads the given file into memory (as a*
145	`std::string`), instantiates an instance of the token definition template
146	using the correct iterator type (`word_count_tokens<char const>`),*
147	and finally calls `lex::tokenize`, passing an instance of the counter function
148	object. The return value of `lex::tokenize()` will be `true` if the
149	whole input sequence has been successfully tokenized, and `false` otherwise.
150	*/
151	int main(int argc, char* argv[])
152	{
153	// these variables are used to count characters, words and lines
154	std::size_t c = `0`, w = `0`, l = `0`;
155
156	// read input from the given file
157	std::string str (read_from_file(infile: `1` == argc ? "word_count.input" : argv[`1`]));
158
159	// create the token definition instance needed to invoke the lexical analyzer
160	word_count_tokens<lex::lexertl::lexer<> > word_count_functor;
161
162	// tokenize the given string, the bound functor gets invoked for each of
163	// the matched tokens
164	using boost::placeholders::_1;
165	char const* first = str.c_str();
166	char const* last = &first[str.size()];
167	bool r = lex::tokenize(first, last, lex: word_count_functor,
168	f: boost::bind(f: counter (), a: _1, a: boost::ref(t&: c), a: boost::ref(t&: w), a: boost::ref(t&: l)));
169
170	// print results
171	if (r) {
172	std::cout << "lines: " << l << ", words: " << w
173	<< ", characters: " << c << "\n";
174	}
175	else {
176	std::string rest(first, last);
177	std::cout << "Lexical analysis failed\n" << "stopped at: \""
178	<< rest << "\"\n";
179	}
180	return `0`;
181	}
182	//]
183
184

source code of boost/libs/spirit/example/lex/word_count_functor.cpp