1 | #ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ |
2 | #define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ |
3 | |
4 | /* Copyright (c) 2004-2005 CrystalClear Software, Inc. |
5 | * Use, modification and distribution is subject to the |
6 | * Boost Software License, Version 1.0. (See accompanying |
7 | * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) |
8 | * Author: Jeff Garland, Bart Garst |
9 | * $Date$ |
10 | */ |
11 | |
12 | |
13 | #include <boost/algorithm/string/case_conv.hpp> |
14 | #include <cctype> |
15 | #include <map> |
16 | #include <string> |
17 | #include <vector> |
18 | #include <ostream> |
19 | #include <iterator> |
20 | #include <algorithm> |
21 | |
22 | namespace boost { namespace date_time { |
23 | |
24 | |
25 | template<typename charT> |
26 | struct parse_match_result |
27 | { |
28 | parse_match_result() : |
29 | match_depth(0), |
30 | current_match(PARSE_ERROR) |
31 | {} |
32 | typedef std::basic_string<charT> string_type; |
33 | string_type remaining() const |
34 | { |
35 | if (match_depth == cache.size()) { |
36 | return string_type(); |
37 | } |
38 | if (current_match == PARSE_ERROR) { |
39 | return cache; |
40 | } |
41 | //some of the cache was used return the rest |
42 | return string_type(cache, match_depth); |
43 | } |
44 | charT last_char() const |
45 | { |
46 | return cache[cache.size()-1]; |
47 | } |
48 | //! Returns true if more characters were parsed than was necessary |
49 | /*! Should be used in conjunction with last_char() |
50 | * to get the remaining character. |
51 | */ |
52 | bool has_remaining() const |
53 | { |
54 | return (cache.size() > match_depth); |
55 | } |
56 | |
57 | // cache will hold characters that have been read from the stream |
58 | string_type cache; |
59 | unsigned short match_depth; |
60 | short current_match; |
61 | enum PARSE_STATE { PARSE_ERROR = -1 }; |
62 | }; |
63 | |
64 | //for debug -- really only char streams... |
65 | template<typename charT> |
66 | std::basic_ostream<charT>& |
67 | operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr) |
68 | { |
69 | os << "cm: " << mr.current_match |
70 | << " C: '" << mr.cache |
71 | << "' md: " << mr.match_depth |
72 | << " R: " << mr.remaining(); |
73 | return os; |
74 | } |
75 | |
76 | |
77 | |
78 | //! Recursive data structure to allow efficient parsing of various strings |
79 | /*! This class provides a quick lookup by building what amounts to a |
80 | * tree data structure. It also features a match function which can |
81 | * can handle nasty input interators by caching values as it recurses |
82 | * the tree so that it can backtrack as needed. |
83 | */ |
84 | template<typename charT> |
85 | struct string_parse_tree |
86 | { |
87 | #if BOOST_WORKAROUND( BOOST_BORLANDC, BOOST_TESTED_AT(0x581) ) |
88 | typedef std::multimap<charT, string_parse_tree< charT> > ptree_coll; |
89 | #else |
90 | typedef std::multimap<charT, string_parse_tree > ptree_coll; |
91 | #endif |
92 | typedef typename ptree_coll::value_type value_type; |
93 | typedef typename ptree_coll::iterator iterator; |
94 | typedef typename ptree_coll::const_iterator const_iterator; |
95 | typedef std::basic_string<charT> string_type; |
96 | typedef std::vector<std::basic_string<charT> > collection_type; |
97 | typedef parse_match_result<charT> parse_match_result_type; |
98 | |
99 | /*! Parameter "starting_point" designates where the numbering begins. |
100 | * A starting_point of zero will start the numbering at zero |
101 | * (Sun=0, Mon=1, ...) were a starting_point of one starts the |
102 | * numbering at one (Jan=1, Feb=2, ...). The default is zero, |
103 | * negative vaules are not allowed */ |
104 | string_parse_tree(collection_type names, unsigned int starting_point=0) : |
105 | m_value(parse_match_result_type::PARSE_ERROR) |
106 | { |
107 | // iterate thru all the elements and build the tree |
108 | unsigned short index = 0; |
109 | while (index != names.size() ) { |
110 | string_type s = boost::algorithm::to_lower_copy(names[index]); |
111 | insert(s, value: static_cast<unsigned short>(index + starting_point)); |
112 | index++; |
113 | } |
114 | //set the last tree node = index+1 indicating a value |
115 | index++; |
116 | } |
117 | |
118 | |
119 | string_parse_tree(short value = parse_match_result_type::PARSE_ERROR) : |
120 | m_value(value) |
121 | {} |
122 | ptree_coll m_next_chars; |
123 | short m_value; |
124 | |
125 | void insert(const string_type& s, unsigned short value) |
126 | { |
127 | unsigned int i = 0; |
128 | iterator ti; |
129 | while(i < s.size()) { |
130 | if (i==0) { |
131 | if (i == (s.size()-1)) { |
132 | ti = m_next_chars.insert(value_type(s[i], |
133 | string_parse_tree<charT>(value))); |
134 | } |
135 | else { |
136 | ti = m_next_chars.insert(value_type(s[i], |
137 | string_parse_tree<charT>())); |
138 | } |
139 | } |
140 | else { |
141 | if (i == (s.size()-1)) { |
142 | ti = ti->second.m_next_chars.insert(value_type(s[i], |
143 | string_parse_tree<charT>(value))); |
144 | } |
145 | |
146 | else { |
147 | ti = ti->second.m_next_chars.insert(value_type(s[i], |
148 | string_parse_tree<charT>())); |
149 | } |
150 | |
151 | } |
152 | i++; |
153 | } |
154 | } |
155 | |
156 | |
157 | //! Recursive function that finds a matching string in the tree. |
158 | /*! Must check match_results::has_remaining() after match() is |
159 | * called. This is required so the user can determine if |
160 | * stream iterator is already pointing to the expected |
161 | * character or not (match() might advance sitr to next char in stream). |
162 | * |
163 | * A parse_match_result that has been returned from a failed match |
164 | * attempt can be sent in to the match function of a different |
165 | * string_parse_tree to attempt a match there. Use the iterators |
166 | * for the partially consumed stream, the parse_match_result object, |
167 | * and '0' for the level parameter. */ |
168 | short |
169 | match(std::istreambuf_iterator<charT>& sitr, |
170 | std::istreambuf_iterator<charT>& stream_end, |
171 | parse_match_result_type& result, |
172 | unsigned int& level) const |
173 | { |
174 | |
175 | level++; |
176 | charT c; |
177 | // if we conditionally advance sitr, we won't have |
178 | // to consume the next character past the input |
179 | bool adv_itr = true; |
180 | if (level > result.cache.size()) { |
181 | if (sitr == stream_end) return 0; //bail - input exhausted |
182 | c = static_cast<charT>(std::tolower(*sitr)); |
183 | //result.cache += c; |
184 | //sitr++; |
185 | } |
186 | else { |
187 | // if we're looking for characters from the cache, |
188 | // we don't want to increment sitr |
189 | adv_itr = false; |
190 | c = static_cast<charT>(std::tolower(result.cache[level-1])); |
191 | } |
192 | const_iterator litr = m_next_chars.lower_bound(c); |
193 | const_iterator uitr = m_next_chars.upper_bound(c); |
194 | while (litr != uitr) { // equal if not found |
195 | if(adv_itr) { |
196 | sitr++; |
197 | result.cache += c; |
198 | } |
199 | if (litr->second.m_value != -1) { // -1 is default value |
200 | if (result.match_depth < level) { |
201 | result.current_match = litr->second.m_value; |
202 | result.match_depth = static_cast<unsigned short>(level); |
203 | } |
204 | litr->second.match(sitr, stream_end, |
205 | result, level); |
206 | level--; |
207 | } |
208 | else { |
209 | litr->second.match(sitr, stream_end, |
210 | result, level); |
211 | level--; |
212 | } |
213 | |
214 | if(level <= result.cache.size()) { |
215 | adv_itr = false; |
216 | } |
217 | |
218 | litr++; |
219 | } |
220 | return result.current_match; |
221 | |
222 | } |
223 | |
224 | /*! Must check match_results::has_remaining() after match() is |
225 | * called. This is required so the user can determine if |
226 | * stream iterator is already pointing to the expected |
227 | * character or not (match() might advance sitr to next char in stream). |
228 | */ |
229 | parse_match_result_type |
230 | match(std::istreambuf_iterator<charT>& sitr, |
231 | std::istreambuf_iterator<charT>& stream_end) const |
232 | { |
233 | // lookup to_lower of char in tree. |
234 | unsigned int level = 0; |
235 | // string_type cache; |
236 | parse_match_result_type result; |
237 | match(sitr, stream_end, result, level); |
238 | return result; |
239 | } |
240 | |
241 | void printme(std::ostream& os, int& level) |
242 | { |
243 | level++; |
244 | iterator itr = m_next_chars.begin(); |
245 | iterator end = m_next_chars.end(); |
246 | // os << "starting level: " << level << std::endl; |
247 | while (itr != end) { |
248 | os << "level: " << level |
249 | << " node: " << itr->first |
250 | << " value: " << itr->second.m_value |
251 | << std::endl; |
252 | itr->second.printme(os, level); |
253 | itr++; |
254 | } |
255 | level--; |
256 | } |
257 | |
258 | void print(std::ostream& os) |
259 | { |
260 | int level = 0; |
261 | printme(os, level); |
262 | } |
263 | |
264 | void printmatch(std::ostream& os, charT c) |
265 | { |
266 | iterator litr = m_next_chars.lower_bound(c); |
267 | iterator uitr = m_next_chars.upper_bound(c); |
268 | os << "matches for: " << c << std::endl; |
269 | while (litr != uitr) { |
270 | os << " node: " << litr->first |
271 | << " value: " << litr->second.m_value |
272 | << std::endl; |
273 | litr++; |
274 | } |
275 | } |
276 | |
277 | }; |
278 | |
279 | |
280 | } } //namespace |
281 | #endif |
282 | |