lexer.hpp source code [boost/libs/spirit/include/boost/spirit/home/lex/lexer/lexer.hpp]

1	// Copyright (c) 2001-2011 Hartmut Kaiser
2	//
3	// Distributed under the Boost Software License, Version 1.0. (See accompanying
4	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6	#if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM)
7	#define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM
8
9	#if defined(_MSC_VER)
10	#pragma once
11	#endif
12
13	#include <boost/spirit/home/support/info.hpp>
14	#include <boost/spirit/home/qi/skip_over.hpp>
15	#include <boost/spirit/home/qi/parser.hpp>
16	#include <boost/spirit/home/qi/detail/assign_to.hpp>
17	#include <boost/spirit/home/lex/reference.hpp>
18	#include <boost/spirit/home/lex/meta_compiler.hpp>
19	#include <boost/spirit/home/lex/lexer_type.hpp>
20	#include <boost/spirit/home/lex/lexer/token_def.hpp>
21	#include <boost/assert.hpp>
22	#include <boost/noncopyable.hpp>
23	#include <boost/fusion/include/vector.hpp>
24	#include <boost/mpl/assert.hpp>
25	#include <boost/proto/extends.hpp>
26	#include <boost/proto/traits.hpp>
27	#include <boost/range/iterator_range_core.hpp>
28	#include <iterator> // for std::iterator_traits
29	#include <string>
30
31	namespace boost { namespace spirit { namespace lex
32	{
33	///////////////////////////////////////////////////////////////////////////
34	namespace detail
35	{
36	///////////////////////////////////////////////////////////////////////
37	#ifdef _MSC_VER
38	# pragma warning(push)
39	# pragma warning(disable: 4512) // assignment operator could not be generated.
40	#endif
41	template <typename LexerDef>
42	struct lexer_def_
43	: proto::extends<
44	typename proto::terminal<
45	lex::reference<lexer_def_<LexerDef> const>
46	>::type
47	, lexer_def_<LexerDef> >
48	, qi::parser<lexer_def_<LexerDef> >
49	, lex::lexer_type<lexer_def_<LexerDef> >
50	{
51	private:
52	// avoid warnings about using 'this' in constructor
53	lexer_def_& this_() { return *this; }
54
55	typedef typename LexerDef::char_type char_type;
56	typedef typename LexerDef::string_type string_type;
57	typedef typename LexerDef::id_type id_type;
58
59	typedef lex::reference<lexer_def_ const> reference_;
60	typedef typename proto::terminal<reference_>::type terminal_type;
61	typedef proto::extends<terminal_type, lexer_def_> proto_base_type;
62
63	reference_ alias() const
64	{
65	return reference_(*this);
66	}
67
68	public:
69	// Qi interface: metafunction calculating parser attribute type
70	template <typename Context, typename Iterator>
71	struct attribute
72	{
73	// the return value of a token set contains the matched token
74	// id, and the corresponding pair of iterators
75	typedef typename Iterator::base_iterator_type iterator_type;
76	typedef
77	fusion::vector2<id_type, iterator_range<iterator_type> >
78	type;
79	};
80
81	// Qi interface: parse functionality
82	template <typename Iterator, typename Context
83	, typename Skipper, typename Attribute>
84	bool parse(Iterator& first, Iterator const& last
85	, Context& /context/, Skipper const& skipper
86	, Attribute& attr) const
87	{
88	qi::skip_over(first, last, skipper); // always do a pre-skip
89
90	if (first != last) {
91	typedef typename
92	std::iterator_traits<Iterator>::value_type
93	token_type;
94
95	token_type const& t = *first;
96	if (token_is_valid(t) && t.state() == first.get_state()) {
97	// any of the token definitions matched
98	spirit::traits::assign_to(t, attr);
99	++first;
100	return true;
101	}
102	}
103	return false;
104	}
105
106	// Qi interface: 'what' functionality
107	template <typename Context>
108	info what(Context& /context/) const
109	{
110	return info("lexer");
111	}
112
113	private:
114	// allow to use the lexer.self.add("regex1", id1)("regex2", id2);
115	// syntax
116	struct adder
117	{
118	adder(lexer_def_& def_)
119	: def(def_) {}
120
121	// Add a token definition based on a single character as given
122	// by the first parameter, the second parameter allows to
123	// specify the token id to use for the new token. If no token
124	// id is given the character code is used.
125	adder const& operator()(char_type c
126	, id_type token_id = id_type()) const
127	{
128	if (id_type() == token_id)
129	token_id = static_cast<id_type>(c);
130	def.def.add_token (def.state.c_str(), c, token_id
131	, def.targetstate.empty() ? `0` : def.targetstate.c_str());
132	return *this;
133	}
134
135	// Add a token definition based on a character sequence as
136	// given by the first parameter, the second parameter allows to
137	// specify the token id to use for the new token. If no token
138	// id is given this function will generate a unique id to be
139	// used as the token's id.
140	adder const& operator()(string_type const& s
141	, id_type token_id = id_type()) const
142	{
143	if (id_type() == token_id)
144	token_id = def.def.get_next_id();
145	def.def.add_token (def.state.c_str(), s, token_id
146	, def.targetstate.empty() ? `0` : def.targetstate.c_str());
147	return *this;
148	}
149
150	template <typename Attribute>
151	adder const& operator()(
152	token_def<Attribute, char_type, id_type>& tokdef
153	, id_type token_id = id_type()) const
154	{
155	// make sure we have a token id
156	if (id_type() == token_id) {
157	if (id_type() == tokdef.id()) {
158	token_id = def.def.get_next_id();
159	tokdef.id(token_id);
160	}
161	else {
162	token_id = tokdef.id();
163	}
164	}
165	else {
166	// the following assertion makes sure that the token_def
167	// instance has not been assigned a different id earlier
168	BOOST_ASSERT(id_type() == tokdef.id()
169	\|\| token_id == tokdef.id());
170	tokdef.id(token_id);
171	}
172
173	def.define(tokdef);
174	return *this;
175	}
176
177	// template <typename F>
178	// adder const& operator()(char_type c, id_type token_id, F act) const
179	// {
180	// if (id_type() == token_id)
181	// token_id = def.def.get_next_id();
182	// std::size_t unique_id =
183	// def.def.add_token (def.state.c_str(), s, token_id);
184	// def.def.add_action(unique_id, def.state.c_str(), act);
185	// return this;*
186	// }
187
188	lexer_def_& def;
189	};
190	friend struct adder;
191
192	// allow to use lexer.self.add_pattern("pattern1", "regex1")(...);
193	// syntax
194	struct pattern_adder
195	{
196	pattern_adder(lexer_def_& def_)
197	: def(def_) {}
198
199	pattern_adder const& operator()(string_type const& p
200	, string_type const& s) const
201	{
202	def.def.add_pattern (def.state.c_str(), p, s);
203	return *this;
204	}
205
206	lexer_def_& def;
207	};
208	friend struct pattern_adder;
209
210	private:
211	// Helper function to invoke the necessary 2 step compilation
212	// process on token definition expressions
213	template <typename TokenExpr>
214	void compile2pass(TokenExpr const& expr)
215	{
216	expr.collect(def, state, targetstate);
217	expr.add_actions(def);
218	}
219
220	public:
221	///////////////////////////////////////////////////////////////////
222	template <typename Expr>
223	void define(Expr const& expr)
224	{
225	compile2pass(compile<lex::domain>(expr));
226	}
227
228	lexer_def_(LexerDef& def_, string_type const& state_
229	, string_type const& targetstate_ = string_type())
230	: proto_base_type(terminal_type::make(alias()))
231	, add(this_()), add_pattern(this_()), def(def_)
232	, state(state_), targetstate(targetstate_)
233	{}
234
235	// allow to switch states
236	lexer_def_ operator()(char_type const* state_) const
237	{
238	return lexer_def_(def, state_);
239	}
240	lexer_def_ operator()(char_type const* state_
241	, char_type const* targetstate_) const
242	{
243	return lexer_def_(def, state_, targetstate_);
244	}
245	lexer_def_ operator()(string_type const& state_
246	, string_type const& targetstate_ = string_type()) const
247	{
248	return lexer_def_(def, state_, targetstate_);
249	}
250
251	// allow to assign a token definition expression
252	template <typename Expr>
253	lexer_def_& operator= (Expr const& xpr)
254	{
255	// Report invalid expression error as early as possible.
256	// If you got an error_invalid_expression error message here,
257	// then the expression (expr) is not a valid spirit lex
258	// expression.
259	BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
260
261	def.clear(state.c_str());
262	define(xpr);
263	return *this;
264	}
265
266	// explicitly tell the lexer that the given state will be defined
267	// (useful in conjunction with "")*
268	std::size_t add_state(char_type const* state_ = `0`)
269	{
270	return def.add_state(state_ ? state_ : def.initial_state().c_str());
271	}
272
273	adder add;
274	pattern_adder add_pattern;
275
276	private:
277	LexerDef& def;
278	string_type state;
279	string_type targetstate;
280	};
281	#ifdef _MSC_VER
282	# pragma warning(pop)
283	#endif
284
285	#if defined(BOOST_NO_CXX11_RVALUE_REFERENCES)
286	// allow to assign a token definition expression
287	template <typename LexerDef, typename Expr>
288	inline lexer_def_<LexerDef>&
289	operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr)
290	{
291	// Report invalid expression error as early as possible.
292	// If you got an error_invalid_expression error message here,
293	// then the expression (expr) is not a valid spirit lex
294	// expression.
295	BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
296
297	lexdef.define(xpr);
298	return lexdef;
299	}
300	#else
301	// allow to assign a token definition expression
302	template <typename LexerDef, typename Expr>
303	inline lexer_def_<LexerDef>&
304	operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr)
305	{
306	// Report invalid expression error as early as possible.
307	// If you got an error_invalid_expression error message here,
308	// then the expression (expr) is not a valid spirit lex
309	// expression.
310	BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
311
312	lexdef.define(xpr);
313	return lexdef;
314	}
315	#endif
316
317	template <typename LexerDef, typename Expr>
318	inline lexer_def_<LexerDef>&
319	operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr)
320	{
321	// Report invalid expression error as early as possible.
322	// If you got an error_invalid_expression error message here,
323	// then the expression (expr) is not a valid spirit lex
324	// expression.
325	BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
326
327	lexdef.define(xpr);
328	return lexdef;
329	}
330	}
331
332	///////////////////////////////////////////////////////////////////////////
333	// The match_flags flags are used to influence different matching
334	// modes of the lexer
335	struct match_flags
336	{
337	enum enum_type
338	{
339	match_default = `0`, // no flags
340	match_not_dot_newline = `1`, // the regex '.' doesn't match newlines
341	match_icase = `2` // all matching operations are case insensitive
342	};
343	};
344
345	///////////////////////////////////////////////////////////////////////////
346	// This represents a lexer object
347	///////////////////////////////////////////////////////////////////////////
348
349	///////////////////////////////////////////////////////////////////////////
350	// This is the first token id automatically assigned by the library
351	// if needed
352	enum tokenids
353	{
354	min_token_id = `0x10000`
355	};
356
357	template <typename Lexer>
358	class lexer : public Lexer
359	{
360	private:
361	// avoid warnings about using 'this' in constructor
362	lexer& this_() { return *this; }
363
364	std::size_t next_token_id; // has to be an integral type
365
366	public:
367	typedef Lexer lexer_type;
368	typedef typename Lexer::id_type id_type;
369	typedef typename Lexer::char_type char_type;
370	typedef typename Lexer::iterator_type iterator_type;
371	typedef lexer base_type;
372
373	typedef detail::lexer_def_<lexer> lexer_def;
374	typedef std::basic_string<char_type> string_type;
375
376	// if `id_type` was specified but `first_id` is not provided
377	// the `min_token_id` value may be out of range for `id_type`,
378	// but it will be a problem only if unique ids feature is in use.
379	lexer(unsigned int flags = match_flags::match_default)
380	: lexer_type(flags)
381	, next_token_id(min_token_id)
382	, self(this_(), lexer_type::initial_state())
383	{}
384
385	lexer(unsigned int flags, id_type first_id)
386	: lexer_type(flags)
387	, next_token_id(first_id)
388	, self(this_(), lexer_type::initial_state())
389	{}
390
391	// access iterator interface
392	template <typename Iterator>
393	iterator_type begin(Iterator& first, Iterator const& last
394	, char_type const* initial_state = `0`) const
395	{ return this->lexer_type::begin(first, last, initial_state); }
396	iterator_type end() const
397	{ return this->lexer_type::end(); }
398
399	std::size_t map_state(char_type const* state)
400	{ return this->lexer_type::add_state(state); }
401
402	// create a unique token id
403	id_type get_next_id() { return id_type(next_token_id++); }
404
405	lexer_def self; // allow for easy token definition
406	};
407
408	}}}
409
410	#endif
411

source code of boost/libs/spirit/include/boost/spirit/home/lex/lexer/lexer.hpp