1#ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
2#define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
3
4/* Copyright (c) 2004-2005 CrystalClear Software, Inc.
5 * Use, modification and distribution is subject to the
6 * Boost Software License, Version 1.0. (See accompanying
7 * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
8 * Author: Jeff Garland, Bart Garst
9 * $Date$
10 */
11
12
13#include <boost/algorithm/string/case_conv.hpp>
14#include <cctype>
15#include <map>
16#include <string>
17#include <vector>
18#include <ostream>
19#include <iterator>
20#include <algorithm>
21
22namespace boost { namespace date_time {
23
24
25template<typename charT>
26struct parse_match_result
27{
28 parse_match_result() :
29 match_depth(0),
30 current_match(PARSE_ERROR)
31 {}
32 typedef std::basic_string<charT> string_type;
33 string_type remaining() const
34 {
35 if (match_depth == cache.size()) {
36 return string_type();
37 }
38 if (current_match == PARSE_ERROR) {
39 return cache;
40 }
41 //some of the cache was used return the rest
42 return string_type(cache, match_depth);
43 }
44 charT last_char() const
45 {
46 return cache[cache.size()-1];
47 }
48 //! Returns true if more characters were parsed than was necessary
49 /*! Should be used in conjunction with last_char()
50 * to get the remaining character.
51 */
52 bool has_remaining() const
53 {
54 return (cache.size() > match_depth);
55 }
56
57 // cache will hold characters that have been read from the stream
58 string_type cache;
59 unsigned short match_depth;
60 short current_match;
61 enum PARSE_STATE { PARSE_ERROR = -1 };
62};
63
64 //for debug -- really only char streams...
65template<typename charT>
66std::basic_ostream<charT>&
67operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr)
68{
69 os << "cm: " << mr.current_match
70 << " C: '" << mr.cache
71 << "' md: " << mr.match_depth
72 << " R: " << mr.remaining();
73 return os;
74}
75
76
77
78//! Recursive data structure to allow efficient parsing of various strings
79/*! This class provides a quick lookup by building what amounts to a
80 * tree data structure. It also features a match function which can
81 * can handle nasty input interators by caching values as it recurses
82 * the tree so that it can backtrack as needed.
83 */
84template<typename charT>
85struct string_parse_tree
86{
87#if BOOST_WORKAROUND( BOOST_BORLANDC, BOOST_TESTED_AT(0x581) )
88 typedef std::multimap<charT, string_parse_tree< charT> > ptree_coll;
89#else
90 typedef std::multimap<charT, string_parse_tree > ptree_coll;
91#endif
92 typedef typename ptree_coll::value_type value_type;
93 typedef typename ptree_coll::iterator iterator;
94 typedef typename ptree_coll::const_iterator const_iterator;
95 typedef std::basic_string<charT> string_type;
96 typedef std::vector<std::basic_string<charT> > collection_type;
97 typedef parse_match_result<charT> parse_match_result_type;
98
99 /*! Parameter "starting_point" designates where the numbering begins.
100 * A starting_point of zero will start the numbering at zero
101 * (Sun=0, Mon=1, ...) were a starting_point of one starts the
102 * numbering at one (Jan=1, Feb=2, ...). The default is zero,
103 * negative vaules are not allowed */
104 string_parse_tree(collection_type names, unsigned int starting_point=0) :
105 m_value(parse_match_result_type::PARSE_ERROR)
106 {
107 // iterate thru all the elements and build the tree
108 unsigned short index = 0;
109 while (index != names.size() ) {
110 string_type s = boost::algorithm::to_lower_copy(names[index]);
111 insert(s, value: static_cast<unsigned short>(index + starting_point));
112 index++;
113 }
114 //set the last tree node = index+1 indicating a value
115 index++;
116 }
117
118
119 string_parse_tree(short value = parse_match_result_type::PARSE_ERROR) :
120 m_value(value)
121 {}
122 ptree_coll m_next_chars;
123 short m_value;
124
125 void insert(const string_type& s, unsigned short value)
126 {
127 unsigned int i = 0;
128 iterator ti;
129 while(i < s.size()) {
130 if (i==0) {
131 if (i == (s.size()-1)) {
132 ti = m_next_chars.insert(value_type(s[i],
133 string_parse_tree<charT>(value)));
134 }
135 else {
136 ti = m_next_chars.insert(value_type(s[i],
137 string_parse_tree<charT>()));
138 }
139 }
140 else {
141 if (i == (s.size()-1)) {
142 ti = ti->second.m_next_chars.insert(value_type(s[i],
143 string_parse_tree<charT>(value)));
144 }
145
146 else {
147 ti = ti->second.m_next_chars.insert(value_type(s[i],
148 string_parse_tree<charT>()));
149 }
150
151 }
152 i++;
153 }
154 }
155
156
157 //! Recursive function that finds a matching string in the tree.
158 /*! Must check match_results::has_remaining() after match() is
159 * called. This is required so the user can determine if
160 * stream iterator is already pointing to the expected
161 * character or not (match() might advance sitr to next char in stream).
162 *
163 * A parse_match_result that has been returned from a failed match
164 * attempt can be sent in to the match function of a different
165 * string_parse_tree to attempt a match there. Use the iterators
166 * for the partially consumed stream, the parse_match_result object,
167 * and '0' for the level parameter. */
168 short
169 match(std::istreambuf_iterator<charT>& sitr,
170 std::istreambuf_iterator<charT>& stream_end,
171 parse_match_result_type& result,
172 unsigned int& level) const
173 {
174
175 level++;
176 charT c;
177 // if we conditionally advance sitr, we won't have
178 // to consume the next character past the input
179 bool adv_itr = true;
180 if (level > result.cache.size()) {
181 if (sitr == stream_end) return 0; //bail - input exhausted
182 c = static_cast<charT>(std::tolower(*sitr));
183 //result.cache += c;
184 //sitr++;
185 }
186 else {
187 // if we're looking for characters from the cache,
188 // we don't want to increment sitr
189 adv_itr = false;
190 c = static_cast<charT>(std::tolower(result.cache[level-1]));
191 }
192 const_iterator litr = m_next_chars.lower_bound(c);
193 const_iterator uitr = m_next_chars.upper_bound(c);
194 while (litr != uitr) { // equal if not found
195 if(adv_itr) {
196 sitr++;
197 result.cache += c;
198 }
199 if (litr->second.m_value != -1) { // -1 is default value
200 if (result.match_depth < level) {
201 result.current_match = litr->second.m_value;
202 result.match_depth = static_cast<unsigned short>(level);
203 }
204 litr->second.match(sitr, stream_end,
205 result, level);
206 level--;
207 }
208 else {
209 litr->second.match(sitr, stream_end,
210 result, level);
211 level--;
212 }
213
214 if(level <= result.cache.size()) {
215 adv_itr = false;
216 }
217
218 litr++;
219 }
220 return result.current_match;
221
222 }
223
224 /*! Must check match_results::has_remaining() after match() is
225 * called. This is required so the user can determine if
226 * stream iterator is already pointing to the expected
227 * character or not (match() might advance sitr to next char in stream).
228 */
229 parse_match_result_type
230 match(std::istreambuf_iterator<charT>& sitr,
231 std::istreambuf_iterator<charT>& stream_end) const
232 {
233 // lookup to_lower of char in tree.
234 unsigned int level = 0;
235 // string_type cache;
236 parse_match_result_type result;
237 match(sitr, stream_end, result, level);
238 return result;
239 }
240
241 void printme(std::ostream& os, int& level)
242 {
243 level++;
244 iterator itr = m_next_chars.begin();
245 iterator end = m_next_chars.end();
246 // os << "starting level: " << level << std::endl;
247 while (itr != end) {
248 os << "level: " << level
249 << " node: " << itr->first
250 << " value: " << itr->second.m_value
251 << std::endl;
252 itr->second.printme(os, level);
253 itr++;
254 }
255 level--;
256 }
257
258 void print(std::ostream& os)
259 {
260 int level = 0;
261 printme(os, level);
262 }
263
264 void printmatch(std::ostream& os, charT c)
265 {
266 iterator litr = m_next_chars.lower_bound(c);
267 iterator uitr = m_next_chars.upper_bound(c);
268 os << "matches for: " << c << std::endl;
269 while (litr != uitr) {
270 os << " node: " << litr->first
271 << " value: " << litr->second.m_value
272 << std::endl;
273 litr++;
274 }
275 }
276
277};
278
279
280} } //namespace
281#endif
282

source code of include/boost/date_time/string_parse_tree.hpp