token_iterpair.cpp source code [boost/libs/spirit/test/lex/token_iterpair.cpp]

1	// Copyright (c) 2001-2011 Hartmut Kaiser
2	//
3	// Distributed under the Boost Software License, Version 1.0. (See accompanying
4	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6	#include <boost/spirit/include/lex_lexertl.hpp>
7	#include <boost/spirit/include/lex_lexertl_position_token.hpp>
8
9	#include <boost/core/lightweight_test.hpp>
10	#include <boost/phoenix/object.hpp>
11	#include <boost/phoenix/operator.hpp>
12	#include <boost/phoenix/stl/container.hpp>
13
14	namespace lex = boost::spirit::lex;
15	namespace phoenix = boost::phoenix;
16	namespace mpl = boost::mpl;
17
18	///////////////////////////////////////////////////////////////////////////////
19	enum tokenids
20	{
21	ID_INT = `1000`,
22	ID_DOUBLE
23	};
24
25	template <typename Lexer>
26	struct token_definitions : lex::lexer<Lexer>
27	{
28	token_definitions()
29	{
30	this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
31	this->self.add_pattern("OCTALDIGIT", "[0-7]");
32	this->self.add_pattern("DIGIT", "[0-9]");
33
34	this->self.add_pattern("OPTSIGN", "[-+]?");
35	this->self.add_pattern("EXPSTART", "[eE][-+]");
36	this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
37
38	// define tokens and associate them with the lexer
39	int_ = "(0x\|0X){HEXDIGIT}+\|0{OCTALDIGIT}\|{OPTSIGN}[1-9]{DIGIT}";
40	int_.id(id: ID_INT);
41
42	double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+\|{DIGIT}+\\.){EXPONENT}?\|{DIGIT}+{EXPONENT}";
43	double_.id(id: ID_DOUBLE);
44
45	whitespace = "[ \t\n]+";
46
47	this->self =
48	double_
49	\| int_
50	\| whitespace [ lex::_pass = lex::pass_flags::pass_ignore ]
51	;
52	}
53
54	lex::token_def<lex::omit> int_;
55	lex::token_def<lex::omit> double_;
56	lex::token_def<lex::omit> whitespace;
57	};
58
59	template <typename Lexer>
60	struct token_definitions_with_state : lex::lexer<Lexer>
61	{
62	token_definitions_with_state()
63	{
64	this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
65	this->self.add_pattern("OCTALDIGIT", "[0-7]");
66	this->self.add_pattern("DIGIT", "[0-9]");
67
68	this->self.add_pattern("OPTSIGN", "[-+]?");
69	this->self.add_pattern("EXPSTART", "[eE][-+]");
70	this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
71
72	this->self.add_state();
73	this->self.add_state("INT");
74	this->self.add_state("DOUBLE");
75
76	// define tokens and associate them with the lexer
77	int_ = "(0x\|0X){HEXDIGIT}+\|0{OCTALDIGIT}\|{OPTSIGN}[1-9]{DIGIT}";
78	int_.id(id: ID_INT);
79
80	double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+\|{DIGIT}+\\.){EXPONENT}?\|{DIGIT}+{EXPONENT}";
81	double_.id(id: ID_DOUBLE);
82
83	whitespace = "[ \t\n]+";
84
85	this->self("*") =
86	double_ [ lex::_state = "DOUBLE"]
87	\| int_ [ lex::_state = "INT" ]
88	\| whitespace [ lex::_pass = lex::pass_flags::pass_ignore ]
89	;
90	}
91
92	lex::token_def<lex::omit> int_;
93	lex::token_def<lex::omit> double_;
94	lex::token_def<lex::omit> whitespace;
95	};
96
97	///////////////////////////////////////////////////////////////////////////////
98	template <typename Token>
99	inline bool
100	test_token_ids(int const* ids, std::vector<Token> const& tokens)
101	{
102	for (std::size_t i = `0`, len = tokens.size(); i < len; ++i)
103	{
104	if (*ids == -`1`)
105	return false; // reached end of expected data
106
107	if (tokens[i].id() != static_cast<std::size_t>(ids)) // token id must match*
108	return false;
109	++ids;
110	}
111
112	return (ids == -`1`) ? true* : false;
113	}
114
115	template <typename Token>
116	inline bool
117	test_token_states(std::size_t const* states, std::vector<Token> const& tokens)
118	{
119	for (std::size_t i = `0`, len = tokens.size(); i < len; ++i)
120	{
121	if (*states == std::size_t(-`1`))
122	return false; // reached end of expected data
123
124	if (tokens[i].state() != states) // token state must match*
125	return false;
126	++states;
127	}
128
129	return (states == std::size_t(-`1`)) ? true* : false;
130	}
131
132	///////////////////////////////////////////////////////////////////////////////
133	struct position_type
134	{
135	std::size_t begin, end;
136	};
137
138	template <typename Iterator, typename Token>
139	inline bool
140	test_token_positions(Iterator begin, position_type const* positions,
141	std::vector<Token> const& tokens)
142	{
143	for (std::size_t i = `0`, len = tokens.size(); i < len; ++i)
144	{
145	if (positions->begin == std::size_t(-`1`) &&
146	positions->end == std::size_t(-`1`))
147	{
148	return false; // reached end of expected data
149	}
150
151	boost::iterator_range<Iterator> matched = tokens[i].matched();
152	std::size_t start = std::distance(begin, matched.begin());
153	std::size_t end = std::distance(begin, matched.end());
154
155	// position must match
156	if (start != positions->begin \|\| end != positions->end)
157	return false;
158
159	++positions;
160	}
161
162	return (positions->begin == std::size_t(-`1`) &&
163	positions->end == std::size_t(-`1`)) ? true : false;
164	}
165
166	///////////////////////////////////////////////////////////////////////////////
167	int main()
168	{
169	typedef std::string::iterator base_iterator_type;
170	std::string input(" 01 1.2 -2 0x3 2.3e6 -3.4");
171	int ids[] = { ID_INT, ID_DOUBLE, ID_INT, ID_INT, ID_DOUBLE, ID_DOUBLE, -`1` };
172	std::size_t states[] = { `0`, `1`, `2`, `1`, `1`, `2`, std::size_t(-`1`) };
173	position_type positions[] =
174	{
175	{ .begin: `1`, .end: `3` }, { .begin: `4`, .end: `7` }, { .begin: `8`, .end: `10` }, { .begin: `11`, .end: `14` }, { .begin: `15`, .end: `20` }, { .begin: `21`, .end: `25` },
176	{ .begin: std::size_t(-`1`), .end: std::size_t(-`1`) }
177	};
178
179	// token type: token id, iterator_pair as token value, no state
180	{
181	typedef lex::lexertl::token<
182	base_iterator_type, mpl::vector<>, mpl::false_> token_type;
183	typedef lex::lexertl::actor_lexer<token_type> lexer_type;
184
185	token_definitions<lexer_type> lexer;
186	std::vector<token_type> tokens;
187	base_iterator_type first = input.begin();
188
189	using phoenix::arg_names::_1;
190	BOOST_TEST(lex::tokenize(first, input.end(), lexer
191	, phoenix::push_back(phoenix::ref(tokens), _1)));
192
193	BOOST_TEST(test_token_ids(ids, tokens));
194	}
195
196	{
197	typedef lex::lexertl::position_token<
198	base_iterator_type, mpl::vector<>, mpl::false_> token_type;
199	typedef lex::lexertl::actor_lexer<token_type> lexer_type;
200
201	token_definitions<lexer_type> lexer;
202	std::vector<token_type> tokens;
203	base_iterator_type first = input.begin();
204
205	using phoenix::arg_names::_1;
206	BOOST_TEST(lex::tokenize(first, input.end(), lexer
207	, phoenix::push_back(phoenix::ref(tokens), _1)));
208
209	BOOST_TEST(test_token_ids(ids, tokens));
210	BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
211	}
212
213	// token type: holds token id, state, iterator_pair as token value
214	{
215	typedef lex::lexertl::token<
216	base_iterator_type, mpl::vector<>, mpl::true_> token_type;
217	typedef lex::lexertl::actor_lexer<token_type> lexer_type;
218
219	token_definitions_with_state<lexer_type> lexer;
220	std::vector<token_type> tokens;
221	base_iterator_type first = input.begin();
222
223	using phoenix::arg_names::_1;
224	BOOST_TEST(lex::tokenize(first, input.end(), lexer
225	, phoenix::push_back(phoenix::ref(tokens), _1)));
226
227	BOOST_TEST(test_token_ids(ids, tokens));
228	BOOST_TEST(test_token_states(states, tokens));
229	}
230
231	{
232	typedef lex::lexertl::position_token<
233	base_iterator_type, mpl::vector<>, mpl::true_> token_type;
234	typedef lex::lexertl::actor_lexer<token_type> lexer_type;
235
236	token_definitions_with_state<lexer_type> lexer;
237	std::vector<token_type> tokens;
238	base_iterator_type first = input.begin();
239
240	using phoenix::arg_names::_1;
241	BOOST_TEST(lex::tokenize(first, input.end(), lexer
242	, phoenix::push_back(phoenix::ref(tokens), _1)));
243
244	BOOST_TEST(test_token_ids(ids, tokens));
245	BOOST_TEST(test_token_states(states, tokens));
246	BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
247	}
248
249	return boost::report_errors();
250	}
251

source code of boost/libs/spirit/test/lex/token_iterpair.cpp