| 1 | #ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ |
| 2 | #define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ |
| 3 | |
| 4 | /* Copyright (c) 2004-2005 CrystalClear Software, Inc. |
| 5 | * Use, modification and distribution is subject to the |
| 6 | * Boost Software License, Version 1.0. (See accompanying |
| 7 | * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) |
| 8 | * Author: Jeff Garland, Bart Garst |
| 9 | * $Date$ |
| 10 | */ |
| 11 | |
| 12 | |
| 13 | #include <boost/algorithm/string/case_conv.hpp> |
| 14 | #include <cctype> |
| 15 | #include <map> |
| 16 | #include <string> |
| 17 | #include <vector> |
| 18 | #include <ostream> |
| 19 | #include <iterator> |
| 20 | #include <algorithm> |
| 21 | |
| 22 | namespace boost { namespace date_time { |
| 23 | |
| 24 | |
| 25 | template<typename charT> |
| 26 | struct parse_match_result |
| 27 | { |
| 28 | parse_match_result() : |
| 29 | match_depth(0), |
| 30 | current_match(PARSE_ERROR) |
| 31 | {} |
| 32 | typedef std::basic_string<charT> string_type; |
| 33 | string_type remaining() const |
| 34 | { |
| 35 | if (match_depth == cache.size()) { |
| 36 | return string_type(); |
| 37 | } |
| 38 | if (current_match == PARSE_ERROR) { |
| 39 | return cache; |
| 40 | } |
| 41 | //some of the cache was used return the rest |
| 42 | return string_type(cache, match_depth); |
| 43 | } |
| 44 | charT last_char() const |
| 45 | { |
| 46 | return cache[cache.size()-1]; |
| 47 | } |
| 48 | //! Returns true if more characters were parsed than was necessary |
| 49 | /*! Should be used in conjunction with last_char() |
| 50 | * to get the remaining character. |
| 51 | */ |
| 52 | bool has_remaining() const |
| 53 | { |
| 54 | return (cache.size() > match_depth); |
| 55 | } |
| 56 | |
| 57 | // cache will hold characters that have been read from the stream |
| 58 | string_type cache; |
| 59 | unsigned short match_depth; |
| 60 | short current_match; |
| 61 | enum PARSE_STATE { PARSE_ERROR = -1 }; |
| 62 | }; |
| 63 | |
| 64 | //for debug -- really only char streams... |
| 65 | template<typename charT> |
| 66 | std::basic_ostream<charT>& |
| 67 | operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr) |
| 68 | { |
| 69 | os << "cm: " << mr.current_match |
| 70 | << " C: '" << mr.cache |
| 71 | << "' md: " << mr.match_depth |
| 72 | << " R: " << mr.remaining(); |
| 73 | return os; |
| 74 | } |
| 75 | |
| 76 | |
| 77 | |
| 78 | //! Recursive data structure to allow efficient parsing of various strings |
| 79 | /*! This class provides a quick lookup by building what amounts to a |
| 80 | * tree data structure. It also features a match function which can |
| 81 | * can handle nasty input interators by caching values as it recurses |
| 82 | * the tree so that it can backtrack as needed. |
| 83 | */ |
| 84 | template<typename charT> |
| 85 | struct string_parse_tree |
| 86 | { |
| 87 | #if BOOST_WORKAROUND( BOOST_BORLANDC, BOOST_TESTED_AT(0x581) ) |
| 88 | typedef std::multimap<charT, string_parse_tree< charT> > ptree_coll; |
| 89 | #else |
| 90 | typedef std::multimap<charT, string_parse_tree > ptree_coll; |
| 91 | #endif |
| 92 | typedef typename ptree_coll::value_type value_type; |
| 93 | typedef typename ptree_coll::iterator iterator; |
| 94 | typedef typename ptree_coll::const_iterator const_iterator; |
| 95 | typedef std::basic_string<charT> string_type; |
| 96 | typedef std::vector<std::basic_string<charT> > collection_type; |
| 97 | typedef parse_match_result<charT> parse_match_result_type; |
| 98 | |
| 99 | /*! Parameter "starting_point" designates where the numbering begins. |
| 100 | * A starting_point of zero will start the numbering at zero |
| 101 | * (Sun=0, Mon=1, ...) were a starting_point of one starts the |
| 102 | * numbering at one (Jan=1, Feb=2, ...). The default is zero, |
| 103 | * negative vaules are not allowed */ |
| 104 | string_parse_tree(collection_type names, unsigned int starting_point=0) : |
| 105 | m_value(parse_match_result_type::PARSE_ERROR) |
| 106 | { |
| 107 | // iterate thru all the elements and build the tree |
| 108 | unsigned short index = 0; |
| 109 | while (index != names.size() ) { |
| 110 | string_type s = boost::algorithm::to_lower_copy(names[index]); |
| 111 | insert(s, value: static_cast<unsigned short>(index + starting_point)); |
| 112 | index++; |
| 113 | } |
| 114 | //set the last tree node = index+1 indicating a value |
| 115 | index++; |
| 116 | } |
| 117 | |
| 118 | |
| 119 | string_parse_tree(short value = parse_match_result_type::PARSE_ERROR) : |
| 120 | m_value(value) |
| 121 | {} |
| 122 | ptree_coll m_next_chars; |
| 123 | short m_value; |
| 124 | |
| 125 | void insert(const string_type& s, unsigned short value) |
| 126 | { |
| 127 | unsigned int i = 0; |
| 128 | iterator ti; |
| 129 | while(i < s.size()) { |
| 130 | if (i==0) { |
| 131 | if (i == (s.size()-1)) { |
| 132 | ti = m_next_chars.insert(value_type(s[i], |
| 133 | string_parse_tree<charT>(value))); |
| 134 | } |
| 135 | else { |
| 136 | ti = m_next_chars.insert(value_type(s[i], |
| 137 | string_parse_tree<charT>())); |
| 138 | } |
| 139 | } |
| 140 | else { |
| 141 | if (i == (s.size()-1)) { |
| 142 | ti = ti->second.m_next_chars.insert(value_type(s[i], |
| 143 | string_parse_tree<charT>(value))); |
| 144 | } |
| 145 | |
| 146 | else { |
| 147 | ti = ti->second.m_next_chars.insert(value_type(s[i], |
| 148 | string_parse_tree<charT>())); |
| 149 | } |
| 150 | |
| 151 | } |
| 152 | i++; |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | |
| 157 | //! Recursive function that finds a matching string in the tree. |
| 158 | /*! Must check match_results::has_remaining() after match() is |
| 159 | * called. This is required so the user can determine if |
| 160 | * stream iterator is already pointing to the expected |
| 161 | * character or not (match() might advance sitr to next char in stream). |
| 162 | * |
| 163 | * A parse_match_result that has been returned from a failed match |
| 164 | * attempt can be sent in to the match function of a different |
| 165 | * string_parse_tree to attempt a match there. Use the iterators |
| 166 | * for the partially consumed stream, the parse_match_result object, |
| 167 | * and '0' for the level parameter. */ |
| 168 | short |
| 169 | match(std::istreambuf_iterator<charT>& sitr, |
| 170 | std::istreambuf_iterator<charT>& stream_end, |
| 171 | parse_match_result_type& result, |
| 172 | unsigned int& level) const |
| 173 | { |
| 174 | |
| 175 | level++; |
| 176 | charT c; |
| 177 | // if we conditionally advance sitr, we won't have |
| 178 | // to consume the next character past the input |
| 179 | bool adv_itr = true; |
| 180 | if (level > result.cache.size()) { |
| 181 | if (sitr == stream_end) return 0; //bail - input exhausted |
| 182 | c = static_cast<charT>(std::tolower(*sitr)); |
| 183 | //result.cache += c; |
| 184 | //sitr++; |
| 185 | } |
| 186 | else { |
| 187 | // if we're looking for characters from the cache, |
| 188 | // we don't want to increment sitr |
| 189 | adv_itr = false; |
| 190 | c = static_cast<charT>(std::tolower(result.cache[level-1])); |
| 191 | } |
| 192 | const_iterator litr = m_next_chars.lower_bound(c); |
| 193 | const_iterator uitr = m_next_chars.upper_bound(c); |
| 194 | while (litr != uitr) { // equal if not found |
| 195 | if(adv_itr) { |
| 196 | sitr++; |
| 197 | result.cache += c; |
| 198 | } |
| 199 | if (litr->second.m_value != -1) { // -1 is default value |
| 200 | if (result.match_depth < level) { |
| 201 | result.current_match = litr->second.m_value; |
| 202 | result.match_depth = static_cast<unsigned short>(level); |
| 203 | } |
| 204 | litr->second.match(sitr, stream_end, |
| 205 | result, level); |
| 206 | level--; |
| 207 | } |
| 208 | else { |
| 209 | litr->second.match(sitr, stream_end, |
| 210 | result, level); |
| 211 | level--; |
| 212 | } |
| 213 | |
| 214 | if(level <= result.cache.size()) { |
| 215 | adv_itr = false; |
| 216 | } |
| 217 | |
| 218 | litr++; |
| 219 | } |
| 220 | return result.current_match; |
| 221 | |
| 222 | } |
| 223 | |
| 224 | /*! Must check match_results::has_remaining() after match() is |
| 225 | * called. This is required so the user can determine if |
| 226 | * stream iterator is already pointing to the expected |
| 227 | * character or not (match() might advance sitr to next char in stream). |
| 228 | */ |
| 229 | parse_match_result_type |
| 230 | match(std::istreambuf_iterator<charT>& sitr, |
| 231 | std::istreambuf_iterator<charT>& stream_end) const |
| 232 | { |
| 233 | // lookup to_lower of char in tree. |
| 234 | unsigned int level = 0; |
| 235 | // string_type cache; |
| 236 | parse_match_result_type result; |
| 237 | match(sitr, stream_end, result, level); |
| 238 | return result; |
| 239 | } |
| 240 | |
| 241 | void printme(std::ostream& os, int& level) |
| 242 | { |
| 243 | level++; |
| 244 | iterator itr = m_next_chars.begin(); |
| 245 | iterator end = m_next_chars.end(); |
| 246 | // os << "starting level: " << level << std::endl; |
| 247 | while (itr != end) { |
| 248 | os << "level: " << level |
| 249 | << " node: " << itr->first |
| 250 | << " value: " << itr->second.m_value |
| 251 | << std::endl; |
| 252 | itr->second.printme(os, level); |
| 253 | itr++; |
| 254 | } |
| 255 | level--; |
| 256 | } |
| 257 | |
| 258 | void print(std::ostream& os) |
| 259 | { |
| 260 | int level = 0; |
| 261 | printme(os, level); |
| 262 | } |
| 263 | |
| 264 | void printmatch(std::ostream& os, charT c) |
| 265 | { |
| 266 | iterator litr = m_next_chars.lower_bound(c); |
| 267 | iterator uitr = m_next_chars.upper_bound(c); |
| 268 | os << "matches for: " << c << std::endl; |
| 269 | while (litr != uitr) { |
| 270 | os << " node: " << litr->first |
| 271 | << " value: " << litr->second.m_value |
| 272 | << std::endl; |
| 273 | litr++; |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | }; |
| 278 | |
| 279 | |
| 280 | } } //namespace |
| 281 | #endif |
| 282 | |