1// Copyright (c) 2001-2011 Hartmut Kaiser
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6#if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM)
7#define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM
8
9#if defined(_MSC_VER)
10#pragma once
11#endif
12
13#include <boost/spirit/home/support/info.hpp>
14#include <boost/spirit/home/qi/skip_over.hpp>
15#include <boost/spirit/home/qi/parser.hpp>
16#include <boost/spirit/home/qi/detail/assign_to.hpp>
17#include <boost/spirit/home/lex/reference.hpp>
18#include <boost/spirit/home/lex/meta_compiler.hpp>
19#include <boost/spirit/home/lex/lexer_type.hpp>
20#include <boost/spirit/home/lex/lexer/token_def.hpp>
21#include <boost/assert.hpp>
22#include <boost/noncopyable.hpp>
23#include <boost/fusion/include/vector.hpp>
24#include <boost/mpl/assert.hpp>
25#include <boost/proto/extends.hpp>
26#include <boost/proto/traits.hpp>
27#include <boost/range/iterator_range_core.hpp>
28#include <iterator> // for std::iterator_traits
29#include <string>
30
31namespace boost { namespace spirit { namespace lex
32{
33 ///////////////////////////////////////////////////////////////////////////
34 namespace detail
35 {
36 ///////////////////////////////////////////////////////////////////////
37#ifdef _MSC_VER
38# pragma warning(push)
39# pragma warning(disable: 4512) // assignment operator could not be generated.
40#endif
41 template <typename LexerDef>
42 struct lexer_def_
43 : proto::extends<
44 typename proto::terminal<
45 lex::reference<lexer_def_<LexerDef> const>
46 >::type
47 , lexer_def_<LexerDef> >
48 , qi::parser<lexer_def_<LexerDef> >
49 , lex::lexer_type<lexer_def_<LexerDef> >
50 {
51 private:
52 // avoid warnings about using 'this' in constructor
53 lexer_def_& this_() { return *this; }
54
55 typedef typename LexerDef::char_type char_type;
56 typedef typename LexerDef::string_type string_type;
57 typedef typename LexerDef::id_type id_type;
58
59 typedef lex::reference<lexer_def_ const> reference_;
60 typedef typename proto::terminal<reference_>::type terminal_type;
61 typedef proto::extends<terminal_type, lexer_def_> proto_base_type;
62
63 reference_ alias() const
64 {
65 return reference_(*this);
66 }
67
68 public:
69 // Qi interface: metafunction calculating parser attribute type
70 template <typename Context, typename Iterator>
71 struct attribute
72 {
73 // the return value of a token set contains the matched token
74 // id, and the corresponding pair of iterators
75 typedef typename Iterator::base_iterator_type iterator_type;
76 typedef
77 fusion::vector2<id_type, iterator_range<iterator_type> >
78 type;
79 };
80
81 // Qi interface: parse functionality
82 template <typename Iterator, typename Context
83 , typename Skipper, typename Attribute>
84 bool parse(Iterator& first, Iterator const& last
85 , Context& /*context*/, Skipper const& skipper
86 , Attribute& attr) const
87 {
88 qi::skip_over(first, last, skipper); // always do a pre-skip
89
90 if (first != last) {
91 typedef typename
92 std::iterator_traits<Iterator>::value_type
93 token_type;
94
95 token_type const& t = *first;
96 if (token_is_valid(t) && t.state() == first.get_state()) {
97 // any of the token definitions matched
98 spirit::traits::assign_to(t, attr);
99 ++first;
100 return true;
101 }
102 }
103 return false;
104 }
105
106 // Qi interface: 'what' functionality
107 template <typename Context>
108 info what(Context& /*context*/) const
109 {
110 return info("lexer");
111 }
112
113 private:
114 // allow to use the lexer.self.add("regex1", id1)("regex2", id2);
115 // syntax
116 struct adder
117 {
118 adder(lexer_def_& def_)
119 : def(def_) {}
120
121 // Add a token definition based on a single character as given
122 // by the first parameter, the second parameter allows to
123 // specify the token id to use for the new token. If no token
124 // id is given the character code is used.
125 adder const& operator()(char_type c
126 , id_type token_id = id_type()) const
127 {
128 if (id_type() == token_id)
129 token_id = static_cast<id_type>(c);
130 def.def.add_token (def.state.c_str(), c, token_id
131 , def.targetstate.empty() ? 0 : def.targetstate.c_str());
132 return *this;
133 }
134
135 // Add a token definition based on a character sequence as
136 // given by the first parameter, the second parameter allows to
137 // specify the token id to use for the new token. If no token
138 // id is given this function will generate a unique id to be
139 // used as the token's id.
140 adder const& operator()(string_type const& s
141 , id_type token_id = id_type()) const
142 {
143 if (id_type() == token_id)
144 token_id = def.def.get_next_id();
145 def.def.add_token (def.state.c_str(), s, token_id
146 , def.targetstate.empty() ? 0 : def.targetstate.c_str());
147 return *this;
148 }
149
150 template <typename Attribute>
151 adder const& operator()(
152 token_def<Attribute, char_type, id_type>& tokdef
153 , id_type token_id = id_type()) const
154 {
155 // make sure we have a token id
156 if (id_type() == token_id) {
157 if (id_type() == tokdef.id()) {
158 token_id = def.def.get_next_id();
159 tokdef.id(token_id);
160 }
161 else {
162 token_id = tokdef.id();
163 }
164 }
165 else {
166 // the following assertion makes sure that the token_def
167 // instance has not been assigned a different id earlier
168 BOOST_ASSERT(id_type() == tokdef.id()
169 || token_id == tokdef.id());
170 tokdef.id(token_id);
171 }
172
173 def.define(tokdef);
174 return *this;
175 }
176
177// template <typename F>
178// adder const& operator()(char_type c, id_type token_id, F act) const
179// {
180// if (id_type() == token_id)
181// token_id = def.def.get_next_id();
182// std::size_t unique_id =
183// def.def.add_token (def.state.c_str(), s, token_id);
184// def.def.add_action(unique_id, def.state.c_str(), act);
185// return *this;
186// }
187
188 lexer_def_& def;
189 };
190 friend struct adder;
191
192 // allow to use lexer.self.add_pattern("pattern1", "regex1")(...);
193 // syntax
194 struct pattern_adder
195 {
196 pattern_adder(lexer_def_& def_)
197 : def(def_) {}
198
199 pattern_adder const& operator()(string_type const& p
200 , string_type const& s) const
201 {
202 def.def.add_pattern (def.state.c_str(), p, s);
203 return *this;
204 }
205
206 lexer_def_& def;
207 };
208 friend struct pattern_adder;
209
210 private:
211 // Helper function to invoke the necessary 2 step compilation
212 // process on token definition expressions
213 template <typename TokenExpr>
214 void compile2pass(TokenExpr const& expr)
215 {
216 expr.collect(def, state, targetstate);
217 expr.add_actions(def);
218 }
219
220 public:
221 ///////////////////////////////////////////////////////////////////
222 template <typename Expr>
223 void define(Expr const& expr)
224 {
225 compile2pass(compile<lex::domain>(expr));
226 }
227
228 lexer_def_(LexerDef& def_, string_type const& state_
229 , string_type const& targetstate_ = string_type())
230 : proto_base_type(terminal_type::make(alias()))
231 , add(this_()), add_pattern(this_()), def(def_)
232 , state(state_), targetstate(targetstate_)
233 {}
234
235 // allow to switch states
236 lexer_def_ operator()(char_type const* state_) const
237 {
238 return lexer_def_(def, state_);
239 }
240 lexer_def_ operator()(char_type const* state_
241 , char_type const* targetstate_) const
242 {
243 return lexer_def_(def, state_, targetstate_);
244 }
245 lexer_def_ operator()(string_type const& state_
246 , string_type const& targetstate_ = string_type()) const
247 {
248 return lexer_def_(def, state_, targetstate_);
249 }
250
251 // allow to assign a token definition expression
252 template <typename Expr>
253 lexer_def_& operator= (Expr const& xpr)
254 {
255 // Report invalid expression error as early as possible.
256 // If you got an error_invalid_expression error message here,
257 // then the expression (expr) is not a valid spirit lex
258 // expression.
259 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
260
261 def.clear(state.c_str());
262 define(xpr);
263 return *this;
264 }
265
266 // explicitly tell the lexer that the given state will be defined
267 // (useful in conjunction with "*")
268 std::size_t add_state(char_type const* state_ = 0)
269 {
270 return def.add_state(state_ ? state_ : def.initial_state().c_str());
271 }
272
273 adder add;
274 pattern_adder add_pattern;
275
276 private:
277 LexerDef& def;
278 string_type state;
279 string_type targetstate;
280 };
281#ifdef _MSC_VER
282# pragma warning(pop)
283#endif
284
285#if defined(BOOST_NO_CXX11_RVALUE_REFERENCES)
286 // allow to assign a token definition expression
287 template <typename LexerDef, typename Expr>
288 inline lexer_def_<LexerDef>&
289 operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr)
290 {
291 // Report invalid expression error as early as possible.
292 // If you got an error_invalid_expression error message here,
293 // then the expression (expr) is not a valid spirit lex
294 // expression.
295 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
296
297 lexdef.define(xpr);
298 return lexdef;
299 }
300#else
301 // allow to assign a token definition expression
302 template <typename LexerDef, typename Expr>
303 inline lexer_def_<LexerDef>&
304 operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr)
305 {
306 // Report invalid expression error as early as possible.
307 // If you got an error_invalid_expression error message here,
308 // then the expression (expr) is not a valid spirit lex
309 // expression.
310 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
311
312 lexdef.define(xpr);
313 return lexdef;
314 }
315#endif
316
317 template <typename LexerDef, typename Expr>
318 inline lexer_def_<LexerDef>&
319 operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr)
320 {
321 // Report invalid expression error as early as possible.
322 // If you got an error_invalid_expression error message here,
323 // then the expression (expr) is not a valid spirit lex
324 // expression.
325 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
326
327 lexdef.define(xpr);
328 return lexdef;
329 }
330 }
331
332 ///////////////////////////////////////////////////////////////////////////
333 // The match_flags flags are used to influence different matching
334 // modes of the lexer
335 struct match_flags
336 {
337 enum enum_type
338 {
339 match_default = 0, // no flags
340 match_not_dot_newline = 1, // the regex '.' doesn't match newlines
341 match_icase = 2 // all matching operations are case insensitive
342 };
343 };
344
345 ///////////////////////////////////////////////////////////////////////////
346 // This represents a lexer object
347 ///////////////////////////////////////////////////////////////////////////
348
349 ///////////////////////////////////////////////////////////////////////////
350 // This is the first token id automatically assigned by the library
351 // if needed
352 enum tokenids
353 {
354 min_token_id = 0x10000
355 };
356
357 template <typename Lexer>
358 class lexer : public Lexer
359 {
360 private:
361 // avoid warnings about using 'this' in constructor
362 lexer& this_() { return *this; }
363
364 std::size_t next_token_id; // has to be an integral type
365
366 public:
367 typedef Lexer lexer_type;
368 typedef typename Lexer::id_type id_type;
369 typedef typename Lexer::char_type char_type;
370 typedef typename Lexer::iterator_type iterator_type;
371 typedef lexer base_type;
372
373 typedef detail::lexer_def_<lexer> lexer_def;
374 typedef std::basic_string<char_type> string_type;
375
376 // if `id_type` was specified but `first_id` is not provided
377 // the `min_token_id` value may be out of range for `id_type`,
378 // but it will be a problem only if unique ids feature is in use.
379 lexer(unsigned int flags = match_flags::match_default)
380 : lexer_type(flags)
381 , next_token_id(min_token_id)
382 , self(this_(), lexer_type::initial_state())
383 {}
384
385 lexer(unsigned int flags, id_type first_id)
386 : lexer_type(flags)
387 , next_token_id(first_id)
388 , self(this_(), lexer_type::initial_state())
389 {}
390
391 // access iterator interface
392 template <typename Iterator>
393 iterator_type begin(Iterator& first, Iterator const& last
394 , char_type const* initial_state = 0) const
395 { return this->lexer_type::begin(first, last, initial_state); }
396 iterator_type end() const
397 { return this->lexer_type::end(); }
398
399 std::size_t map_state(char_type const* state)
400 { return this->lexer_type::add_state(state); }
401
402 // create a unique token id
403 id_type get_next_id() { return id_type(next_token_id++); }
404
405 lexer_def self; // allow for easy token definition
406 };
407
408}}}
409
410#endif
411

source code of boost/libs/spirit/include/boost/spirit/home/lex/lexer/lexer.hpp