set_token_value.cpp source code [boost/libs/spirit/test/lex/set_token_value.cpp]

1	// Copyright (c) 2001-2011 Hartmut Kaiser
2	//
3	// Distributed under the Boost Software License, Version 1.0. (See accompanying
4	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6	#include <boost/spirit/include/lex_lexertl.hpp>
7
8	#include <boost/phoenix/object.hpp>
9	#include <boost/phoenix/operator.hpp>
10	#include <boost/phoenix/stl/container.hpp>
11
12	#include <boost/core/lightweight_test.hpp>
13
14	using namespace boost::spirit;
15
16	///////////////////////////////////////////////////////////////////////////////
17	// semantic action analyzing leading whitespace
18	enum tokenids
19	{
20	ID_INDENT = `1000`,
21	ID_DEDENT
22	};
23
24	#ifdef _MSC_VER
25	# pragma warning(push)
26	# pragma warning(disable: 4512) // assignment operator could not be generated.
27	#endif
28	struct handle_whitespace
29	{
30	handle_whitespace(std::stack<unsigned int>& indents)
31	: indents_(indents) {}
32
33	template <typename Iterator, typename IdType, typename Context>
34	void operator()(Iterator& start, Iterator& end
35	, BOOST_SCOPED_ENUM(lex::pass_flags)& pass, IdType& id
36	, Context& ctx)
37	{
38	unsigned int level = `0`;
39	if (is_indent(start, end, level)) {
40	id = ID_INDENT;
41	ctx.set_value(level);
42	}
43	else if (is_dedent(start, end, level)) {
44	id = ID_DEDENT;
45	ctx.set_value(level);
46	}
47	else {
48	pass = lex::pass_flags::pass_ignore;
49	}
50	}
51
52	// Get indentation level, for now (no tabs) we just count the spaces
53	// once we allow tabs in the regex this needs to be expanded
54	template <typename Iterator>
55	unsigned int get_indent(Iterator& start, Iterator& end)
56	{
57	return static_cast<unsigned int>(std::distance(start, end));
58	}
59
60	template <typename Iterator>
61	bool is_dedent(Iterator& start, Iterator& end, unsigned int& level)
62	{
63	unsigned int newindent = get_indent(start, end);
64	while (!indents_.empty() && newindent < indents_.top()) {
65	level++; // dedent one more level
66	indents_.pop();
67	}
68	return level > `0`;
69	}
70
71	// Handle additional indentation
72	template <typename Iterator>
73	bool is_indent(Iterator& start, Iterator& end, unsigned int& level)
74	{
75	unsigned int newindent = get_indent(start, end);
76	if (indents_.empty() \|\| newindent > indents_.top()) {
77	level = `1`; // indent one more level
78	indents_.push(x: newindent);
79	return true;
80	}
81	return false;
82	}
83
84	std::stack<unsigned int>& indents_;
85	};
86	#ifdef _MSC_VER
87	# pragma warning(pop)
88	#endif
89
90	///////////////////////////////////////////////////////////////////////////////
91	// Token definition
92	template <typename Lexer>
93	struct set_token_value : boost::spirit::lex::lexer<Lexer>
94	{
95	set_token_value()
96	{
97	using lex::_pass;
98
99	// define tokens and associate them with the lexer
100	whitespace = "^[ ]+";
101	newline = `'\n'`;
102
103	this->self = whitespace [ handle_whitespace(indents) ];
104	this->self += newline [ _pass = lex::pass_flags::pass_ignore ];
105	}
106
107	lex::token_def<unsigned int> whitespace;
108	lex::token_def<> newline;
109	std::stack<unsigned int> indents;
110	};
111
112	///////////////////////////////////////////////////////////////////////////////
113	struct token_data
114	{
115	int id;
116	unsigned int value;
117	};
118
119	template <typename Token>
120	inline
121	bool test_tokens(token_data const* d, std::vector<Token> const& tokens)
122	{
123	for (std::size_t i = `0`, len = tokens.size(); i < len; ++i)
124	{
125	if (d->id == -`1`)
126	return false; // reached end of expected data
127
128	typename Token::token_value_type const& value (tokens[i].value());
129	if (tokens[i].id() != static_cast<std::size_t>(d->id)) // token id must match
130	return false;
131	if (value.which() != `1`) // must have an integer value
132	return false;
133	if (boost::get<unsigned int>(value) != d->value) // value must match
134	return false;
135	++d;
136	}
137
138	return (d->id == -`1`) ? true : false;
139	}
140
141	inline
142	bool test_indents(int i, std::stack<unsigned* int>& indents)
143	{
144	while (!indents.empty())
145	{
146	if (*i == -`1`)
147	return false; // reached end of expected data
148	if (indents.top() != static_cast<unsigned int>(*i))
149	return false; // value must match
150
151	++i;
152	indents.pop();
153	}
154
155	return (i == -`1`) ? true* : false;
156	}
157
158	///////////////////////////////////////////////////////////////////////////////
159	int main()
160	{
161	namespace lex = boost::spirit::lex;
162	namespace phoenix = boost::phoenix;
163
164	typedef std::string::iterator base_iterator_type;
165	typedef boost::mpl::vector<unsigned int> token_value_types;
166	typedef lex::lexertl::token<base_iterator_type, token_value_types> token_type;
167	typedef lex::lexertl::actor_lexer<token_type> lexer_type;
168
169	// test simple indent
170	{
171	set_token_value<lexer_type> lexer;
172	std::vector<token_type> tokens;
173	std::string input(" ");
174	base_iterator_type first = input.begin();
175
176	using phoenix::arg_names::_1;
177	BOOST_TEST(lex::tokenize(first, input.end(), lexer
178	, phoenix::push_back(phoenix::ref(tokens), _1)));
179
180	int i[] = { `4`, -`1` };
181	BOOST_TEST(test_indents(i, lexer.indents));
182
183	token_data d[] = { { .id: ID_INDENT, .value: `1` }, { .id: -`1`, .value: `0` } };
184	BOOST_TEST(test_tokens(d, tokens));
185	}
186
187	// test two indents
188	{
189	set_token_value<lexer_type> lexer;
190	std::vector<token_type> tokens;
191	std::string input(
192	" \n"
193	" \n");
194	base_iterator_type first = input.begin();
195
196	using phoenix::arg_names::_1;
197	BOOST_TEST(lex::tokenize(first, input.end(), lexer
198	, phoenix::push_back(phoenix::ref(tokens), _1)));
199
200	int i[] = { `8`, `4`, -`1` };
201	BOOST_TEST(test_indents(i, lexer.indents));
202
203	token_data d[] = {
204	{ .id: ID_INDENT, .value: `1` }, { .id: ID_INDENT, .value: `1` }
205	, { .id: -`1`, .value: `0` } };
206	BOOST_TEST(test_tokens(d, tokens));
207	}
208
209	// test one dedent
210	{
211	set_token_value<lexer_type> lexer;
212	std::vector<token_type> tokens;
213	std::string input(
214	" \n"
215	" \n"
216	" \n");
217	base_iterator_type first = input.begin();
218
219	using phoenix::arg_names::_1;
220	BOOST_TEST(lex::tokenize(first, input.end(), lexer
221	, phoenix::push_back(phoenix::ref(tokens), _1)));
222
223	int i[] = { `4`, -`1` };
224	BOOST_TEST(test_indents(i, lexer.indents));
225
226	token_data d[] = {
227	{ .id: ID_INDENT, .value: `1` }, { .id: ID_INDENT, .value: `1` }
228	, { .id: ID_DEDENT, .value: `1` }
229	, { .id: -`1`, .value: `0` } };
230	BOOST_TEST(test_tokens(d, tokens));
231	}
232
233	// test two dedents
234	{
235	set_token_value<lexer_type> lexer;
236	std::vector<token_type> tokens;
237	std::string input(
238	" \n"
239	" \n"
240	" \n"
241	" \n");
242	base_iterator_type first = input.begin();
243
244	using phoenix::arg_names::_1;
245	BOOST_TEST(lex::tokenize(first, input.end(), lexer
246	, phoenix::push_back(phoenix::ref(tokens), _1)));
247
248	int i[] = { `4`, -`1` };
249	BOOST_TEST(test_indents(i, lexer.indents));
250
251	token_data d[] = {
252	{ .id: ID_INDENT, .value: `1` }, { .id: ID_INDENT, .value: `1` }, { .id: ID_INDENT, .value: `1` }
253	, { .id: ID_DEDENT, .value: `2` }
254	, { .id: -`1`, .value: `0` } };
255	BOOST_TEST(test_tokens(d, tokens));
256	}
257
258	return boost::report_errors();
259	}
260
261

source code of boost/libs/spirit/test/lex/set_token_value.cpp