1 | #ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ |
2 | #define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ |
3 | |
4 | /* Copyright (c) 2004-2005 CrystalClear Software, Inc. |
5 | * Use, modification and distribution is subject to the |
6 | * Boost Software License, Version 1.0. (See accompanying |
7 | * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) |
8 | * Author: Jeff Garland, Bart Garst |
9 | * $Date$ |
10 | */ |
11 | |
12 | |
13 | #include "boost/lexical_cast.hpp" //error without? |
14 | #include "boost/algorithm/string/case_conv.hpp" |
15 | #include <map> |
16 | #include <string> |
17 | #include <vector> |
18 | #include <algorithm> |
19 | |
20 | namespace boost { namespace date_time { |
21 | |
22 | |
23 | template<typename charT> |
24 | struct parse_match_result |
25 | { |
26 | parse_match_result() : |
27 | match_depth(0), |
28 | current_match(-1)// -1 is match_not-found value |
29 | {} |
30 | typedef std::basic_string<charT> string_type; |
31 | string_type remaining() const |
32 | { |
33 | if (match_depth == cache.size()) { |
34 | return string_type(); |
35 | } |
36 | if (current_match == -1) { |
37 | return cache; |
38 | } |
39 | //some of the cache was used return the rest |
40 | return string_type(cache, match_depth); |
41 | } |
42 | charT last_char() const |
43 | { |
44 | return cache[cache.size()-1]; |
45 | } |
46 | //! Returns true if more characters were parsed than was necessary |
47 | /*! Should be used in conjunction with last_char() |
48 | * to get the remaining character. |
49 | */ |
50 | bool has_remaining() const |
51 | { |
52 | return (cache.size() > match_depth); |
53 | } |
54 | |
55 | // cache will hold characters that have been read from the stream |
56 | string_type cache; |
57 | unsigned short match_depth; |
58 | short current_match; |
59 | enum PARSE_STATE { PARSE_ERROR= -1 }; |
60 | }; |
61 | |
62 | //for debug -- really only char streams... |
63 | template<typename charT> |
64 | std::basic_ostream<charT>& |
65 | operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr) |
66 | { |
67 | os << "cm: " << mr.current_match |
68 | << " C: '" << mr.cache |
69 | << "' md: " << mr.match_depth |
70 | << " R: " << mr.remaining(); |
71 | return os; |
72 | } |
73 | |
74 | |
75 | |
76 | //! Recursive data structure to allow efficient parsing of various strings |
77 | /*! This class provides a quick lookup by building what amounts to a |
78 | * tree data structure. It also features a match function which can |
79 | * can handle nasty input interators by caching values as it recurses |
80 | * the tree so that it can backtrack as needed. |
81 | */ |
82 | template<typename charT> |
83 | struct string_parse_tree |
84 | { |
85 | #if BOOST_WORKAROUND( __BORLANDC__, BOOST_TESTED_AT(0x581) ) |
86 | typedef std::multimap<charT, string_parse_tree< charT> > ptree_coll; |
87 | #else |
88 | typedef std::multimap<charT, string_parse_tree > ptree_coll; |
89 | #endif |
90 | typedef typename ptree_coll::value_type value_type; |
91 | typedef typename ptree_coll::iterator iterator; |
92 | typedef typename ptree_coll::const_iterator const_iterator; |
93 | typedef std::basic_string<charT> string_type; |
94 | typedef std::vector<std::basic_string<charT> > collection_type; |
95 | typedef parse_match_result<charT> parse_match_result_type; |
96 | |
97 | /*! Parameter "starting_point" designates where the numbering begins. |
98 | * A starting_point of zero will start the numbering at zero |
99 | * (Sun=0, Mon=1, ...) were a starting_point of one starts the |
100 | * numbering at one (Jan=1, Feb=2, ...). The default is zero, |
101 | * negative vaules are not allowed */ |
102 | string_parse_tree(collection_type names, unsigned int starting_point=0) |
103 | { |
104 | // iterate thru all the elements and build the tree |
105 | unsigned short index = 0; |
106 | while (index != names.size() ) { |
107 | string_type s = boost::algorithm::to_lower_copy(names[index]); |
108 | insert(s, value: static_cast<unsigned short>(index + starting_point)); |
109 | index++; |
110 | } |
111 | //set the last tree node = index+1 indicating a value |
112 | index++; |
113 | } |
114 | |
115 | |
116 | string_parse_tree(short value = -1) : |
117 | m_value(value) |
118 | {} |
119 | ptree_coll m_next_chars; |
120 | short m_value; |
121 | |
122 | void insert(const string_type& s, unsigned short value) |
123 | { |
124 | unsigned int i = 0; |
125 | iterator ti; |
126 | while(i < s.size()) { |
127 | if (i==0) { |
128 | if (i == (s.size()-1)) { |
129 | ti = m_next_chars.insert(value_type(s[i], |
130 | string_parse_tree<charT>(value))); |
131 | } |
132 | else { |
133 | ti = m_next_chars.insert(value_type(s[i], |
134 | string_parse_tree<charT>())); |
135 | } |
136 | } |
137 | else { |
138 | if (i == (s.size()-1)) { |
139 | ti = ti->second.m_next_chars.insert(value_type(s[i], |
140 | string_parse_tree<charT>(value))); |
141 | } |
142 | |
143 | else { |
144 | ti = ti->second.m_next_chars.insert(value_type(s[i], |
145 | string_parse_tree<charT>())); |
146 | } |
147 | |
148 | } |
149 | i++; |
150 | } |
151 | } |
152 | |
153 | |
154 | //! Recursive function that finds a matching string in the tree. |
155 | /*! Must check match_results::has_remaining() after match() is |
156 | * called. This is required so the user can determine if |
157 | * stream iterator is already pointing to the expected |
158 | * character or not (match() might advance sitr to next char in stream). |
159 | * |
160 | * A parse_match_result that has been returned from a failed match |
161 | * attempt can be sent in to the match function of a different |
162 | * string_parse_tree to attempt a match there. Use the iterators |
163 | * for the partially consumed stream, the parse_match_result object, |
164 | * and '0' for the level parameter. */ |
165 | short |
166 | match(std::istreambuf_iterator<charT>& sitr, |
167 | std::istreambuf_iterator<charT>& stream_end, |
168 | parse_match_result_type& result, |
169 | unsigned int& level) const |
170 | { |
171 | |
172 | level++; |
173 | charT c; |
174 | // if we conditionally advance sitr, we won't have |
175 | // to consume the next character past the input |
176 | bool adv_itr = true; |
177 | if (level > result.cache.size()) { |
178 | if (sitr == stream_end) return 0; //bail - input exhausted |
179 | c = static_cast<charT>(std::tolower(*sitr)); |
180 | //result.cache += c; |
181 | //sitr++; |
182 | } |
183 | else { |
184 | // if we're looking for characters from the cache, |
185 | // we don't want to increment sitr |
186 | adv_itr = false; |
187 | c = static_cast<charT>(std::tolower(result.cache[level-1])); |
188 | } |
189 | const_iterator litr = m_next_chars.lower_bound(c); |
190 | const_iterator uitr = m_next_chars.upper_bound(c); |
191 | while (litr != uitr) { // equal if not found |
192 | if(adv_itr) { |
193 | sitr++; |
194 | result.cache += c; |
195 | } |
196 | if (litr->second.m_value != -1) { // -1 is default value |
197 | if (result.match_depth < level) { |
198 | result.current_match = litr->second.m_value; |
199 | result.match_depth = static_cast<unsigned short>(level); |
200 | } |
201 | litr->second.match(sitr, stream_end, |
202 | result, level); |
203 | level--; |
204 | } |
205 | else { |
206 | litr->second.match(sitr, stream_end, |
207 | result, level); |
208 | level--; |
209 | } |
210 | |
211 | if(level <= result.cache.size()) { |
212 | adv_itr = false; |
213 | } |
214 | |
215 | litr++; |
216 | } |
217 | return result.current_match; |
218 | |
219 | } |
220 | |
221 | /*! Must check match_results::has_remaining() after match() is |
222 | * called. This is required so the user can determine if |
223 | * stream iterator is already pointing to the expected |
224 | * character or not (match() might advance sitr to next char in stream). |
225 | */ |
226 | parse_match_result_type |
227 | match(std::istreambuf_iterator<charT>& sitr, |
228 | std::istreambuf_iterator<charT>& stream_end) const |
229 | { |
230 | // lookup to_lower of char in tree. |
231 | unsigned int level = 0; |
232 | // string_type cache; |
233 | parse_match_result_type result; |
234 | match(sitr, stream_end, result, level); |
235 | return result; |
236 | } |
237 | |
238 | void printme(std::ostream& os, int& level) |
239 | { |
240 | level++; |
241 | iterator itr = m_next_chars.begin(); |
242 | iterator end = m_next_chars.end(); |
243 | // os << "starting level: " << level << std::endl; |
244 | while (itr != end) { |
245 | os << "level: " << level |
246 | << " node: " << itr->first |
247 | << " value: " << itr->second.m_value |
248 | << std::endl; |
249 | itr->second.printme(os, level); |
250 | itr++; |
251 | } |
252 | level--; |
253 | } |
254 | |
255 | void print(std::ostream& os) |
256 | { |
257 | int level = 0; |
258 | printme(os, level); |
259 | } |
260 | |
261 | void printmatch(std::ostream& os, charT c) |
262 | { |
263 | iterator litr = m_next_chars.lower_bound(c); |
264 | iterator uitr = m_next_chars.upper_bound(c); |
265 | os << "matches for: " << c << std::endl; |
266 | while (litr != uitr) { |
267 | os << " node: " << litr->first |
268 | << " value: " << litr->second.m_value |
269 | << std::endl; |
270 | litr++; |
271 | } |
272 | } |
273 | |
274 | }; |
275 | |
276 | |
277 | } } //namespace |
278 | #endif |
279 | |