1#ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
2#define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
3
4/* Copyright (c) 2004-2005 CrystalClear Software, Inc.
5 * Use, modification and distribution is subject to the
6 * Boost Software License, Version 1.0. (See accompanying
7 * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
8 * Author: Jeff Garland, Bart Garst
9 * $Date$
10 */
11
12
13#include "boost/lexical_cast.hpp" //error without?
14#include "boost/algorithm/string/case_conv.hpp"
15#include <map>
16#include <string>
17#include <vector>
18#include <algorithm>
19
20namespace boost { namespace date_time {
21
22
23template<typename charT>
24struct parse_match_result
25{
26 parse_match_result() :
27 match_depth(0),
28 current_match(-1)// -1 is match_not-found value
29 {}
30 typedef std::basic_string<charT> string_type;
31 string_type remaining() const
32 {
33 if (match_depth == cache.size()) {
34 return string_type();
35 }
36 if (current_match == -1) {
37 return cache;
38 }
39 //some of the cache was used return the rest
40 return string_type(cache, match_depth);
41 }
42 charT last_char() const
43 {
44 return cache[cache.size()-1];
45 }
46 //! Returns true if more characters were parsed than was necessary
47 /*! Should be used in conjunction with last_char()
48 * to get the remaining character.
49 */
50 bool has_remaining() const
51 {
52 return (cache.size() > match_depth);
53 }
54
55 // cache will hold characters that have been read from the stream
56 string_type cache;
57 unsigned short match_depth;
58 short current_match;
59 enum PARSE_STATE { PARSE_ERROR= -1 };
60};
61
62 //for debug -- really only char streams...
63template<typename charT>
64std::basic_ostream<charT>&
65operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr)
66{
67 os << "cm: " << mr.current_match
68 << " C: '" << mr.cache
69 << "' md: " << mr.match_depth
70 << " R: " << mr.remaining();
71 return os;
72}
73
74
75
76//! Recursive data structure to allow efficient parsing of various strings
77/*! This class provides a quick lookup by building what amounts to a
78 * tree data structure. It also features a match function which can
79 * can handle nasty input interators by caching values as it recurses
80 * the tree so that it can backtrack as needed.
81 */
82template<typename charT>
83struct string_parse_tree
84{
85#if BOOST_WORKAROUND( __BORLANDC__, BOOST_TESTED_AT(0x581) )
86 typedef std::multimap<charT, string_parse_tree< charT> > ptree_coll;
87#else
88 typedef std::multimap<charT, string_parse_tree > ptree_coll;
89#endif
90 typedef typename ptree_coll::value_type value_type;
91 typedef typename ptree_coll::iterator iterator;
92 typedef typename ptree_coll::const_iterator const_iterator;
93 typedef std::basic_string<charT> string_type;
94 typedef std::vector<std::basic_string<charT> > collection_type;
95 typedef parse_match_result<charT> parse_match_result_type;
96
97 /*! Parameter "starting_point" designates where the numbering begins.
98 * A starting_point of zero will start the numbering at zero
99 * (Sun=0, Mon=1, ...) were a starting_point of one starts the
100 * numbering at one (Jan=1, Feb=2, ...). The default is zero,
101 * negative vaules are not allowed */
102 string_parse_tree(collection_type names, unsigned int starting_point=0)
103 {
104 // iterate thru all the elements and build the tree
105 unsigned short index = 0;
106 while (index != names.size() ) {
107 string_type s = boost::algorithm::to_lower_copy(names[index]);
108 insert(s, value: static_cast<unsigned short>(index + starting_point));
109 index++;
110 }
111 //set the last tree node = index+1 indicating a value
112 index++;
113 }
114
115
116 string_parse_tree(short value = -1) :
117 m_value(value)
118 {}
119 ptree_coll m_next_chars;
120 short m_value;
121
122 void insert(const string_type& s, unsigned short value)
123 {
124 unsigned int i = 0;
125 iterator ti;
126 while(i < s.size()) {
127 if (i==0) {
128 if (i == (s.size()-1)) {
129 ti = m_next_chars.insert(value_type(s[i],
130 string_parse_tree<charT>(value)));
131 }
132 else {
133 ti = m_next_chars.insert(value_type(s[i],
134 string_parse_tree<charT>()));
135 }
136 }
137 else {
138 if (i == (s.size()-1)) {
139 ti = ti->second.m_next_chars.insert(value_type(s[i],
140 string_parse_tree<charT>(value)));
141 }
142
143 else {
144 ti = ti->second.m_next_chars.insert(value_type(s[i],
145 string_parse_tree<charT>()));
146 }
147
148 }
149 i++;
150 }
151 }
152
153
154 //! Recursive function that finds a matching string in the tree.
155 /*! Must check match_results::has_remaining() after match() is
156 * called. This is required so the user can determine if
157 * stream iterator is already pointing to the expected
158 * character or not (match() might advance sitr to next char in stream).
159 *
160 * A parse_match_result that has been returned from a failed match
161 * attempt can be sent in to the match function of a different
162 * string_parse_tree to attempt a match there. Use the iterators
163 * for the partially consumed stream, the parse_match_result object,
164 * and '0' for the level parameter. */
165 short
166 match(std::istreambuf_iterator<charT>& sitr,
167 std::istreambuf_iterator<charT>& stream_end,
168 parse_match_result_type& result,
169 unsigned int& level) const
170 {
171
172 level++;
173 charT c;
174 // if we conditionally advance sitr, we won't have
175 // to consume the next character past the input
176 bool adv_itr = true;
177 if (level > result.cache.size()) {
178 if (sitr == stream_end) return 0; //bail - input exhausted
179 c = static_cast<charT>(std::tolower(*sitr));
180 //result.cache += c;
181 //sitr++;
182 }
183 else {
184 // if we're looking for characters from the cache,
185 // we don't want to increment sitr
186 adv_itr = false;
187 c = static_cast<charT>(std::tolower(result.cache[level-1]));
188 }
189 const_iterator litr = m_next_chars.lower_bound(c);
190 const_iterator uitr = m_next_chars.upper_bound(c);
191 while (litr != uitr) { // equal if not found
192 if(adv_itr) {
193 sitr++;
194 result.cache += c;
195 }
196 if (litr->second.m_value != -1) { // -1 is default value
197 if (result.match_depth < level) {
198 result.current_match = litr->second.m_value;
199 result.match_depth = static_cast<unsigned short>(level);
200 }
201 litr->second.match(sitr, stream_end,
202 result, level);
203 level--;
204 }
205 else {
206 litr->second.match(sitr, stream_end,
207 result, level);
208 level--;
209 }
210
211 if(level <= result.cache.size()) {
212 adv_itr = false;
213 }
214
215 litr++;
216 }
217 return result.current_match;
218
219 }
220
221 /*! Must check match_results::has_remaining() after match() is
222 * called. This is required so the user can determine if
223 * stream iterator is already pointing to the expected
224 * character or not (match() might advance sitr to next char in stream).
225 */
226 parse_match_result_type
227 match(std::istreambuf_iterator<charT>& sitr,
228 std::istreambuf_iterator<charT>& stream_end) const
229 {
230 // lookup to_lower of char in tree.
231 unsigned int level = 0;
232 // string_type cache;
233 parse_match_result_type result;
234 match(sitr, stream_end, result, level);
235 return result;
236 }
237
238 void printme(std::ostream& os, int& level)
239 {
240 level++;
241 iterator itr = m_next_chars.begin();
242 iterator end = m_next_chars.end();
243 // os << "starting level: " << level << std::endl;
244 while (itr != end) {
245 os << "level: " << level
246 << " node: " << itr->first
247 << " value: " << itr->second.m_value
248 << std::endl;
249 itr->second.printme(os, level);
250 itr++;
251 }
252 level--;
253 }
254
255 void print(std::ostream& os)
256 {
257 int level = 0;
258 printme(os, level);
259 }
260
261 void printmatch(std::ostream& os, charT c)
262 {
263 iterator litr = m_next_chars.lower_bound(c);
264 iterator uitr = m_next_chars.upper_bound(c);
265 os << "matches for: " << c << std::endl;
266 while (litr != uitr) {
267 os << " node: " << litr->first
268 << " value: " << litr->second.m_value
269 << std::endl;
270 litr++;
271 }
272 }
273
274};
275
276
277} } //namespace
278#endif
279

source code of boost/boost/date_time/string_parse_tree.hpp