1 | #ifndef _DATE_TIME_DATE_PARSING_HPP___ |
2 | #define _DATE_TIME_DATE_PARSING_HPP___ |
3 | |
4 | /* Copyright (c) 2002,2003,2005 CrystalClear Software, Inc. |
5 | * Use, modification and distribution is subject to the |
6 | * Boost Software License, Version 1.0. (See accompanying |
7 | * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) |
8 | * Author: Jeff Garland, Bart Garst |
9 | * $Date$ |
10 | */ |
11 | |
12 | #include <map> |
13 | #include <string> |
14 | #include <sstream> |
15 | #include <iterator> |
16 | #include <algorithm> |
17 | #include <boost/tokenizer.hpp> |
18 | #include <boost/lexical_cast.hpp> |
19 | #include <boost/date_time/compiler_config.hpp> |
20 | #include <boost/date_time/parse_format_base.hpp> |
21 | #include <boost/date_time/period.hpp> |
22 | |
23 | #if defined(BOOST_DATE_TIME_NO_LOCALE) |
24 | #include <cctype> // ::tolower(int) |
25 | #else |
26 | #include <locale> // std::tolower(char, locale) |
27 | #endif |
28 | |
29 | namespace boost { |
30 | namespace date_time { |
31 | |
32 | //! A function to replace the std::transform( , , ,tolower) construct |
33 | /*! This function simply takes a string, and changes all the characters |
34 | * in that string to lowercase (according to the default system locale). |
35 | * In the event that a compiler does not support locales, the old |
36 | * C style tolower() is used. |
37 | */ |
38 | inline |
39 | std::string |
40 | convert_to_lower(std::string inp) |
41 | { |
42 | #if !defined(BOOST_DATE_TIME_NO_LOCALE) |
43 | const std::locale loc(std::locale::classic()); |
44 | #endif |
45 | std::string::size_type i = 0, n = inp.length(); |
46 | for (; i < n; ++i) { |
47 | inp[i] = |
48 | #if defined(BOOST_DATE_TIME_NO_LOCALE) |
49 | static_cast<char>(std::tolower(inp[i])); |
50 | #else |
51 | // tolower and others were brought in to std for borland >= v564 |
52 | // in compiler_config.hpp |
53 | std::tolower(c: inp[i], loc: loc); |
54 | #endif |
55 | } |
56 | return inp; |
57 | } |
58 | |
59 | //! Helper function for parse_date. |
60 | template<class month_type> |
61 | inline unsigned short |
62 | month_str_to_ushort(std::string const& s) { |
63 | if((s.at(n: 0) >= '0') && (s.at(n: 0) <= '9')) { |
64 | return boost::lexical_cast<unsigned short>(arg: s); |
65 | } |
66 | else { |
67 | std::string str = convert_to_lower(inp: s); |
68 | //c++98 support |
69 | #if defined(BOOST_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX) |
70 | static std::map<std::string, unsigned short> month_map; |
71 | typedef std::map<std::string, unsigned short>::value_type vtype; |
72 | if( month_map.empty() ) { |
73 | month_map.insert( vtype("jan" , static_cast<unsigned short>(1)) ); |
74 | month_map.insert( vtype("january" , static_cast<unsigned short>(1)) ); |
75 | month_map.insert( vtype("feb" , static_cast<unsigned short>(2)) ); |
76 | month_map.insert( vtype("february" , static_cast<unsigned short>(2)) ); |
77 | month_map.insert( vtype("mar" , static_cast<unsigned short>(3)) ); |
78 | month_map.insert( vtype("march" , static_cast<unsigned short>(3)) ); |
79 | month_map.insert( vtype("apr" , static_cast<unsigned short>(4)) ); |
80 | month_map.insert( vtype("april" , static_cast<unsigned short>(4)) ); |
81 | month_map.insert( vtype("may" , static_cast<unsigned short>(5)) ); |
82 | month_map.insert( vtype("jun" , static_cast<unsigned short>(6)) ); |
83 | month_map.insert( vtype("june" , static_cast<unsigned short>(6)) ); |
84 | month_map.insert( vtype("jul" , static_cast<unsigned short>(7)) ); |
85 | month_map.insert( vtype("july" , static_cast<unsigned short>(7)) ); |
86 | month_map.insert( vtype("aug" , static_cast<unsigned short>(8)) ); |
87 | month_map.insert( vtype("august" , static_cast<unsigned short>(8)) ); |
88 | month_map.insert( vtype("sep" , static_cast<unsigned short>(9)) ); |
89 | month_map.insert( vtype("september" , static_cast<unsigned short>(9)) ); |
90 | month_map.insert( vtype("oct" , static_cast<unsigned short>(10)) ); |
91 | month_map.insert( vtype("october" , static_cast<unsigned short>(10)) ); |
92 | month_map.insert( vtype("nov" , static_cast<unsigned short>(11)) ); |
93 | month_map.insert( vtype("november" , static_cast<unsigned short>(11)) ); |
94 | month_map.insert( vtype("dec" , static_cast<unsigned short>(12)) ); |
95 | month_map.insert( vtype("december" , static_cast<unsigned short>(12)) ); |
96 | } |
97 | #else //c+11 and beyond |
98 | static std::map<std::string, unsigned short> month_map = |
99 | { { "jan" , static_cast<unsigned short>(1) }, { "january" , static_cast<unsigned short>(1) }, |
100 | { "feb" , static_cast<unsigned short>(2) }, { "february" , static_cast<unsigned short>(2) }, |
101 | { "mar" , static_cast<unsigned short>(3) }, { "march" , static_cast<unsigned short>(3) }, |
102 | { "apr" , static_cast<unsigned short>(4) }, { "april" , static_cast<unsigned short>(4) }, |
103 | { "may" , static_cast<unsigned short>(5) }, |
104 | { "jun" , static_cast<unsigned short>(6) }, { "june" , static_cast<unsigned short>(6) }, |
105 | { "jul" , static_cast<unsigned short>(7) }, { "july" , static_cast<unsigned short>(7) }, |
106 | { "aug" , static_cast<unsigned short>(8) }, { "august" , static_cast<unsigned short>(8) }, |
107 | { "sep" , static_cast<unsigned short>(9) }, { "september" , static_cast<unsigned short>(9) }, |
108 | { "oct" , static_cast<unsigned short>(10) }, { "october" , static_cast<unsigned short>(10)}, |
109 | { "nov" , static_cast<unsigned short>(11) }, { "november" , static_cast<unsigned short>(11)}, |
110 | { "dec" , static_cast<unsigned short>(12) }, { "december" , static_cast<unsigned short>(12)} |
111 | }; |
112 | #endif |
113 | std::map<std::string, unsigned short>::const_iterator mitr = month_map.find( x: str ); |
114 | if ( mitr != month_map.end() ) { |
115 | return mitr->second; |
116 | } |
117 | } |
118 | return 13; // intentionally out of range - name not found |
119 | } |
120 | |
121 | |
122 | //! Generic function to parse a delimited date (eg: 2002-02-10) |
123 | /*! Accepted formats are: "2003-02-10" or " 2003-Feb-10" or |
124 | * "2003-Feburary-10" |
125 | * The order in which the Month, Day, & Year appear in the argument |
126 | * string can be accomodated by passing in the appropriate ymd_order_spec |
127 | */ |
128 | template<class date_type> |
129 | date_type |
130 | parse_date(const std::string& s, int order_spec = ymd_order_iso) { |
131 | std::string spec_str; |
132 | if(order_spec == ymd_order_iso) { |
133 | spec_str = "ymd" ; |
134 | } |
135 | else if(order_spec == ymd_order_dmy) { |
136 | spec_str = "dmy" ; |
137 | } |
138 | else { // (order_spec == ymd_order_us) |
139 | spec_str = "mdy" ; |
140 | } |
141 | |
142 | typedef typename date_type::month_type month_type; |
143 | unsigned pos = 0; |
144 | unsigned short year(0), month(0), day(0); |
145 | typedef typename std::basic_string<char>::traits_type traits_type; |
146 | typedef boost::char_separator<char, traits_type> char_separator_type; |
147 | typedef boost::tokenizer<char_separator_type, |
148 | std::basic_string<char>::const_iterator, |
149 | std::basic_string<char> > tokenizer; |
150 | typedef boost::tokenizer<char_separator_type, |
151 | std::basic_string<char>::const_iterator, |
152 | std::basic_string<char> >::iterator tokenizer_iterator; |
153 | // may need more delimiters, these work for the regression tests |
154 | const char sep_char[] = {',','-','.',' ','/','\0'}; |
155 | char_separator_type sep(sep_char); |
156 | tokenizer tok(s,sep); |
157 | for(tokenizer_iterator beg=tok.begin(); |
158 | beg!=tok.end() && pos < spec_str.size(); |
159 | ++beg, ++pos) { |
160 | switch(spec_str.at(n: pos)) { |
161 | case 'y': |
162 | { |
163 | year = boost::lexical_cast<unsigned short>(arg: *beg); |
164 | break; |
165 | } |
166 | case 'm': |
167 | { |
168 | month = month_str_to_ushort<month_type>(*beg); |
169 | break; |
170 | } |
171 | case 'd': |
172 | { |
173 | day = boost::lexical_cast<unsigned short>(arg: *beg); |
174 | break; |
175 | } |
176 | default: break; |
177 | } //switch |
178 | } |
179 | return date_type(year, month, day); |
180 | } |
181 | |
182 | //! Generic function to parse undelimited date (eg: 20020201) |
183 | template<class date_type> |
184 | date_type |
185 | parse_undelimited_date(const std::string& s) { |
186 | int offsets[] = {4,2,2}; |
187 | int pos = 0; |
188 | //typename date_type::ymd_type ymd((year_type::min)(),1,1); |
189 | unsigned short y = 0, m = 0, d = 0; |
190 | |
191 | /* The two bool arguments state that parsing will not wrap |
192 | * (only the first 8 characters will be parsed) and partial |
193 | * strings will not be parsed. |
194 | * Ex: |
195 | * "2005121" will parse 2005 & 12, but not the "1" */ |
196 | boost::offset_separator osf(offsets, offsets+3, false, false); |
197 | |
198 | typedef typename boost::tokenizer<boost::offset_separator, |
199 | std::basic_string<char>::const_iterator, |
200 | std::basic_string<char> > tokenizer_type; |
201 | tokenizer_type tok(s, osf); |
202 | for(typename tokenizer_type::iterator ti=tok.begin(); ti!=tok.end();++ti) { |
203 | unsigned short i = boost::lexical_cast<unsigned short>(arg: *ti); |
204 | switch(pos) { |
205 | case 0: y = i; break; |
206 | case 1: m = i; break; |
207 | case 2: d = i; break; |
208 | default: break; |
209 | } |
210 | pos++; |
211 | } |
212 | return date_type(y,m,d); |
213 | } |
214 | |
215 | //! Helper function for 'date gregorian::from_stream()' |
216 | /*! Creates a string from the iterators that reference the |
217 | * begining & end of a char[] or string. All elements are |
218 | * used in output string */ |
219 | template<class date_type, class iterator_type> |
220 | inline |
221 | date_type |
222 | from_stream_type(iterator_type& beg, |
223 | iterator_type const& end, |
224 | char) |
225 | { |
226 | std::ostringstream ss; |
227 | while(beg != end) { |
228 | ss << *beg++; |
229 | } |
230 | return parse_date<date_type>(ss.str()); |
231 | } |
232 | |
233 | //! Helper function for 'date gregorian::from_stream()' |
234 | /*! Returns the first string found in the stream referenced by the |
235 | * begining & end iterators */ |
236 | template<class date_type, class iterator_type> |
237 | inline |
238 | date_type |
239 | from_stream_type(iterator_type& beg, |
240 | iterator_type const& /* end */, |
241 | std::string const&) |
242 | { |
243 | return parse_date<date_type>(*beg); |
244 | } |
245 | |
246 | /* I believe the wchar stuff would be best elsewhere, perhaps in |
247 | * parse_date<>()? In the mean time this gets us started... */ |
248 | //! Helper function for 'date gregorian::from_stream()' |
249 | /*! Creates a string from the iterators that reference the |
250 | * begining & end of a wstring. All elements are |
251 | * used in output string */ |
252 | template<class date_type, class iterator_type> |
253 | inline |
254 | date_type from_stream_type(iterator_type& beg, |
255 | iterator_type const& end, |
256 | wchar_t) |
257 | { |
258 | std::ostringstream ss; |
259 | #if !defined(BOOST_DATE_TIME_NO_LOCALE) |
260 | std::locale loc; |
261 | std::ctype<wchar_t> const& fac = std::use_facet<std::ctype<wchar_t> >(loc: loc); |
262 | while(beg != end) { |
263 | ss << fac.narrow(*beg++, 'X'); // 'X' will cause exception to be thrown |
264 | } |
265 | #else |
266 | while(beg != end) { |
267 | char c = 'X'; // 'X' will cause exception to be thrown |
268 | const wchar_t wc = *beg++; |
269 | if (wc >= 0 && wc <= 127) |
270 | c = static_cast< char >(wc); |
271 | ss << c; |
272 | } |
273 | #endif |
274 | return parse_date<date_type>(ss.str()); |
275 | } |
276 | #ifndef BOOST_NO_STD_WSTRING |
277 | //! Helper function for 'date gregorian::from_stream()' |
278 | /*! Creates a string from the first wstring found in the stream |
279 | * referenced by the begining & end iterators */ |
280 | template<class date_type, class iterator_type> |
281 | inline |
282 | date_type |
283 | from_stream_type(iterator_type& beg, |
284 | iterator_type const& /* end */, |
285 | std::wstring const&) { |
286 | std::wstring ws = *beg; |
287 | std::ostringstream ss; |
288 | std::wstring::iterator wsb = ws.begin(), wse = ws.end(); |
289 | #if !defined(BOOST_DATE_TIME_NO_LOCALE) |
290 | std::locale loc; |
291 | std::ctype<wchar_t> const& fac = std::use_facet<std::ctype<wchar_t> >(loc: loc); |
292 | while(wsb != wse) { |
293 | ss << fac.narrow(c: *wsb++, dfault: 'X'); // 'X' will cause exception to be thrown |
294 | } |
295 | #else |
296 | while(wsb != wse) { |
297 | char c = 'X'; // 'X' will cause exception to be thrown |
298 | const wchar_t wc = *wsb++; |
299 | if (wc >= 0 && wc <= 127) |
300 | c = static_cast< char >(wc); |
301 | ss << c; |
302 | } |
303 | #endif |
304 | return parse_date<date_type>(ss.str()); |
305 | } |
306 | #endif // BOOST_NO_STD_WSTRING |
307 | #if (defined(BOOST_MSVC) && (_MSC_VER < 1300)) |
308 | // This function cannot be compiled with MSVC 6.0 due to internal compiler shorcomings |
309 | #else |
310 | //! function called by wrapper functions: date_period_from_(w)string() |
311 | template<class date_type, class charT> |
312 | period<date_type, typename date_type::duration_type> |
313 | from_simple_string_type(const std::basic_string<charT>& s){ |
314 | typedef typename std::basic_string<charT>::traits_type traits_type; |
315 | typedef typename boost::char_separator<charT, traits_type> char_separator; |
316 | typedef typename boost::tokenizer<char_separator, |
317 | typename std::basic_string<charT>::const_iterator, |
318 | std::basic_string<charT> > tokenizer; |
319 | const charT sep_list[4] = {'[','/',']','\0'}; |
320 | char_separator sep(sep_list); |
321 | tokenizer tokens(s, sep); |
322 | typename tokenizer::iterator tok_it = tokens.begin(); |
323 | std::basic_string<charT> date_string = *tok_it; |
324 | // get 2 string iterators and generate a date from them |
325 | typename std::basic_string<charT>::iterator date_string_start = date_string.begin(), |
326 | date_string_end = date_string.end(); |
327 | typedef typename std::iterator_traits<typename std::basic_string<charT>::iterator>::value_type value_type; |
328 | date_type d1 = from_stream_type<date_type>(date_string_start, date_string_end, value_type()); |
329 | date_string = *(++tok_it); // next token |
330 | date_string_start = date_string.begin(), date_string_end = date_string.end(); |
331 | date_type d2 = from_stream_type<date_type>(date_string_start, date_string_end, value_type()); |
332 | return period<date_type, typename date_type::duration_type>(d1, d2); |
333 | } |
334 | #endif |
335 | |
336 | } } //namespace date_time |
337 | |
338 | |
339 | |
340 | |
341 | #endif |
342 | |
343 | |