| 1 | // Copyright (c) 2001-2011 Hartmut Kaiser |
| 2 | // |
| 3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying |
| 4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| 5 | |
| 6 | #if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM) |
| 7 | #define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM |
| 8 | |
| 9 | #if defined(_MSC_VER) |
| 10 | #pragma once |
| 11 | #endif |
| 12 | |
| 13 | #include <boost/mpl/bool.hpp> |
| 14 | #include <boost/detail/workaround.hpp> |
| 15 | #include <boost/spirit/home/lex/lexer/pass_flags.hpp> |
| 16 | #include <boost/assert.hpp> |
| 17 | #include <iterator> // for std::iterator_traits |
| 18 | |
| 19 | #if 0 != __COMO_VERSION__ || !BOOST_WORKAROUND(BOOST_MSVC, <= 1310) |
| 20 | #define BOOST_SPIRIT_STATIC_EOF 1 |
| 21 | #define BOOST_SPIRIT_EOF_PREFIX static |
| 22 | #else |
| 23 | #define BOOST_SPIRIT_EOF_PREFIX |
| 24 | #endif |
| 25 | |
| 26 | namespace boost { namespace spirit { namespace lex { namespace lexertl |
| 27 | { |
| 28 | /////////////////////////////////////////////////////////////////////////// |
| 29 | // |
| 30 | // functor is a template usable as the functor object for the |
| 31 | // multi_pass iterator allowing to wrap a lexertl based dfa into a |
| 32 | // iterator based interface. |
| 33 | // |
| 34 | // Token: the type of the tokens produced by this functor |
| 35 | // this needs to expose a constructor with the following |
| 36 | // prototype: |
| 37 | // |
| 38 | // Token(std::size_t id, std::size_t state, |
| 39 | // Iterator start, Iterator end) |
| 40 | // |
| 41 | // where 'id' is the token id, state is the lexer state, |
| 42 | // this token has been matched in, and 'first' and 'end' |
| 43 | // mark the start and the end of the token with respect |
| 44 | // to the underlying character stream. |
| 45 | // FunctorData: |
| 46 | // this is expected to encapsulate the shared part of the |
| 47 | // functor (see lex/lexer/lexertl/functor_data.hpp for an |
| 48 | // example and documentation). |
| 49 | // Iterator: the type of the underlying iterator |
| 50 | // SupportsActors: |
| 51 | // this is expected to be a mpl::bool_, if mpl::true_ the |
| 52 | // functor invokes functors which (optionally) have |
| 53 | // been attached to the token definitions. |
| 54 | // SupportState: |
| 55 | // this is expected to be a mpl::bool_, if mpl::true_ the |
| 56 | // functor supports different lexer states, |
| 57 | // otherwise no lexer state is supported. |
| 58 | // |
| 59 | /////////////////////////////////////////////////////////////////////////// |
| 60 | template <typename Token |
| 61 | , template <typename, typename, typename, typename> class FunctorData |
| 62 | , typename Iterator = typename Token::iterator_type |
| 63 | , typename SupportsActors = mpl::false_ |
| 64 | , typename SupportsState = typename Token::has_state> |
| 65 | class functor |
| 66 | { |
| 67 | public: |
| 68 | typedef typename |
| 69 | std::iterator_traits<Iterator>::value_type |
| 70 | char_type; |
| 71 | |
| 72 | private: |
| 73 | // Needed by compilers not implementing the resolution to DR45. For |
| 74 | // reference, see |
| 75 | // http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45. |
| 76 | typedef typename Token::token_value_type token_value_type; |
| 77 | friend class FunctorData<Iterator, SupportsActors, SupportsState |
| 78 | , token_value_type>; |
| 79 | |
| 80 | #ifdef _MSC_VER |
| 81 | # pragma warning(push) |
| 82 | # pragma warning(disable: 4512) // assignment operator could not be generated. |
| 83 | #endif |
| 84 | // Helper template allowing to assign a value on exit |
| 85 | template <typename T> |
| 86 | struct assign_on_exit |
| 87 | { |
| 88 | assign_on_exit(T& dst, T const& src) |
| 89 | : dst_(dst), src_(src) {} |
| 90 | |
| 91 | ~assign_on_exit() |
| 92 | { |
| 93 | dst_ = src_; |
| 94 | } |
| 95 | |
| 96 | T& dst_; |
| 97 | T const& src_; |
| 98 | }; |
| 99 | #ifdef _MSC_VER |
| 100 | # pragma warning(pop) |
| 101 | #endif |
| 102 | |
| 103 | public: |
| 104 | functor() {} |
| 105 | |
| 106 | #if BOOST_WORKAROUND(BOOST_MSVC, <= 1310) |
| 107 | // somehow VC7.1 needs this (meaningless) assignment operator |
| 108 | functor& operator=(functor const& rhs) |
| 109 | { |
| 110 | return *this; |
| 111 | } |
| 112 | #endif |
| 113 | |
| 114 | /////////////////////////////////////////////////////////////////////// |
| 115 | // interface to the iterator_policies::split_functor_input policy |
| 116 | typedef Token result_type; |
| 117 | typedef functor unique; |
| 118 | typedef FunctorData<Iterator, SupportsActors, SupportsState |
| 119 | , token_value_type> shared; |
| 120 | |
| 121 | BOOST_SPIRIT_EOF_PREFIX result_type const eof; |
| 122 | |
| 123 | /////////////////////////////////////////////////////////////////////// |
| 124 | typedef Iterator iterator_type; |
| 125 | typedef typename shared::semantic_actions_type semantic_actions_type; |
| 126 | typedef typename shared::next_token_functor next_token_functor; |
| 127 | typedef typename shared::get_state_name_type get_state_name_type; |
| 128 | |
| 129 | // this is needed to wrap the semantic actions in a proper way |
| 130 | typedef typename shared::wrap_action_type wrap_action_type; |
| 131 | |
| 132 | /////////////////////////////////////////////////////////////////////// |
| 133 | template <typename MultiPass> |
| 134 | static result_type& get_next(MultiPass& mp, result_type& result) |
| 135 | { |
| 136 | typedef typename result_type::id_type id_type; |
| 137 | |
| 138 | shared& data = mp.shared()->ftor; |
| 139 | for(;;) |
| 140 | { |
| 141 | if (data.get_first() == data.get_last()) |
| 142 | #if defined(BOOST_SPIRIT_STATIC_EOF) |
| 143 | return result = eof; |
| 144 | #else |
| 145 | return result = mp.ftor.eof; |
| 146 | #endif |
| 147 | |
| 148 | data.reset_value(); |
| 149 | Iterator end = data.get_first(); |
| 150 | std::size_t unique_id = boost::lexer::npos; |
| 151 | bool prev_bol = false; |
| 152 | |
| 153 | // lexer matching might change state |
| 154 | std::size_t state = data.get_state(); |
| 155 | std::size_t id = data.next(end, unique_id, prev_bol); |
| 156 | |
| 157 | if (boost::lexer::npos == id) { // no match |
| 158 | #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) |
| 159 | std::string next; |
| 160 | Iterator it = data.get_first(); |
| 161 | for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i) |
| 162 | next += *it; |
| 163 | |
| 164 | std::cerr << "Not matched, in state: " << state |
| 165 | << ", lookahead: >" << next << "<" << std::endl; |
| 166 | #endif |
| 167 | return result = result_type(0); |
| 168 | } |
| 169 | else if (0 == id) { // EOF reached |
| 170 | #if defined(BOOST_SPIRIT_STATIC_EOF) |
| 171 | return result = eof; |
| 172 | #else |
| 173 | return result = mp.ftor.eof; |
| 174 | #endif |
| 175 | } |
| 176 | |
| 177 | #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) |
| 178 | { |
| 179 | std::string next; |
| 180 | Iterator it = end; |
| 181 | for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i) |
| 182 | next += *it; |
| 183 | |
| 184 | std::cerr << "Matched: " << id << ", in state: " |
| 185 | << state << ", string: >" |
| 186 | << std::basic_string<char_type>(data.get_first(), end) << "<" |
| 187 | << ", lookahead: >" << next << "<" << std::endl; |
| 188 | if (data.get_state() != state) { |
| 189 | std::cerr << "Switched to state: " |
| 190 | << data.get_state() << std::endl; |
| 191 | } |
| 192 | } |
| 193 | #endif |
| 194 | // account for a possibly pending lex::more(), i.e. moving |
| 195 | // data.first_ back to the start of the previously matched token. |
| 196 | bool adjusted = data.adjust_start(); |
| 197 | |
| 198 | // set the end of the matched input sequence in the token data |
| 199 | data.set_end(end); |
| 200 | |
| 201 | // invoke attached semantic actions, if defined, might change |
| 202 | // state, id, data.first_, and/or end |
| 203 | BOOST_SCOPED_ENUM(pass_flags) pass = |
| 204 | data.invoke_actions(state, id, unique_id, end); |
| 205 | |
| 206 | if (data.has_value()) { |
| 207 | // return matched token using the token value as set before |
| 208 | // using data.set_value(), advancing 'data.first_' past the |
| 209 | // matched sequence |
| 210 | assign_on_exit<Iterator> on_exit(data.get_first(), end); |
| 211 | return result = result_type(id_type(id), state, data.get_value()); |
| 212 | } |
| 213 | else if (pass_flags::pass_normal == pass) { |
| 214 | // return matched token, advancing 'data.first_' past the |
| 215 | // matched sequence |
| 216 | assign_on_exit<Iterator> on_exit(data.get_first(), end); |
| 217 | return result = result_type(id_type(id), state, data.get_first(), end); |
| 218 | } |
| 219 | else if (pass_flags::pass_fail == pass) { |
| 220 | #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) |
| 221 | std::cerr << "Matching forced to fail" << std::endl; |
| 222 | #endif |
| 223 | // if the data.first_ got adjusted above, revert this adjustment |
| 224 | if (adjusted) |
| 225 | data.revert_adjust_start(); |
| 226 | |
| 227 | // one of the semantic actions signaled no-match |
| 228 | data.reset_bol(prev_bol); |
| 229 | if (state != data.get_state()) |
| 230 | continue; // retry matching if state has changed |
| 231 | |
| 232 | // if the state is unchanged repeating the match wouldn't |
| 233 | // move the input forward, causing an infinite loop |
| 234 | return result = result_type(0); |
| 235 | } |
| 236 | |
| 237 | #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) |
| 238 | std::cerr << "Token ignored, continuing matching" << std::endl; |
| 239 | #endif |
| 240 | // if this token needs to be ignored, just repeat the matching, |
| 241 | // while starting right after the current match |
| 242 | data.get_first() = end; |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | // set_state are propagated up to the iterator interface, allowing to |
| 247 | // manipulate the current lexer state through any of the exposed |
| 248 | // iterators. |
| 249 | template <typename MultiPass> |
| 250 | static std::size_t set_state(MultiPass& mp, std::size_t state) |
| 251 | { |
| 252 | std::size_t oldstate = mp.shared()->ftor.get_state(); |
| 253 | mp.shared()->ftor.set_state(state); |
| 254 | |
| 255 | #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) |
| 256 | std::cerr << "Switching state from: " << oldstate |
| 257 | << " to: " << state |
| 258 | << std::endl; |
| 259 | #endif |
| 260 | return oldstate; |
| 261 | } |
| 262 | |
| 263 | template <typename MultiPass> |
| 264 | static std::size_t get_state(MultiPass& mp) |
| 265 | { |
| 266 | return mp.shared()->ftor.get_state(); |
| 267 | } |
| 268 | |
| 269 | template <typename MultiPass> |
| 270 | static std::size_t |
| 271 | map_state(MultiPass const& mp, char_type const* statename) |
| 272 | { |
| 273 | return mp.shared()->ftor.get_state_id(statename); |
| 274 | } |
| 275 | |
| 276 | // we don't need this, but it must be there |
| 277 | template <typename MultiPass> |
| 278 | static void destroy(MultiPass const&) {} |
| 279 | }; |
| 280 | |
| 281 | #if defined(BOOST_SPIRIT_STATIC_EOF) |
| 282 | /////////////////////////////////////////////////////////////////////////// |
| 283 | // eof token |
| 284 | /////////////////////////////////////////////////////////////////////////// |
| 285 | template <typename Token |
| 286 | , template <typename, typename, typename, typename> class FunctorData |
| 287 | , typename Iterator, typename SupportsActors, typename SupportsState> |
| 288 | typename functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::result_type const |
| 289 | functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::eof = |
| 290 | typename functor<Token, FunctorData, Iterator, SupportsActors |
| 291 | , SupportsState>::result_type(); |
| 292 | #endif |
| 293 | |
| 294 | }}}} |
| 295 | |
| 296 | #undef BOOST_SPIRIT_EOF_PREFIX |
| 297 | #undef BOOST_SPIRIT_STATIC_EOF |
| 298 | |
| 299 | #endif |
| 300 | |